diff --git a/setup.py b/setup.py index a85731db936..ba510d57c2b 100644 --- a/setup.py +++ b/setup.py @@ -14,12 +14,17 @@ import struct import subprocess import sys +import tempfile import warnings from collections.abc import Iterator -from typing import Any +from typing import TYPE_CHECKING, Any from setuptools import Extension, setup from setuptools.command.build_ext import build_ext +from setuptools.errors import CompileError + +if TYPE_CHECKING: + import distutils.ccompiler def get_version() -> str: @@ -292,6 +297,47 @@ def _pkg_config(name: str) -> tuple[list[str], list[str]] | None: return None +def _try_compile(compiler: distutils.ccompiler.CCompiler, code: str) -> bool: + try: + with tempfile.TemporaryDirectory() as d: + fn = os.path.join(d, "test.c") + with open(fn, "w") as f: + f.write(code) + compiler.compile([fn], output_dir=d, extra_preargs=["-Werror"]) + return True + except CompileError: + return False + + +def _try_compile_attr(compiler: distutils.ccompiler.CCompiler, attr: str) -> bool: + code = f""" + #pragma GCC diagnostic error "-Wattributes" + #pragma clang diagnostic error "-Wattributes" + + int {attr} foo; + int main() {{ + return 0; + }} + """ + + return _try_compile(compiler, code) + + +def _try_compile_tls_define_macros( + compiler: distutils.ccompiler.CCompiler, +) -> list[tuple[str, str | None]]: + if _try_compile_attr(compiler, "thread_local"): # C23 + return [("HAVE_THREAD_LOCAL", None)] + elif _try_compile_attr(compiler, "_Thread_local"): # C11/C17 + return [("HAVE__THREAD_LOCAL", None)] + elif _try_compile_attr(compiler, "__thread"): # GCC/clang + return [("HAVE___THREAD", None)] + elif _try_compile_attr(compiler, "__declspec(thread)"): # MSVC + return [("HAVE___DECLSPEC_THREAD_", None)] + else: + return [] + + class pil_build_ext(build_ext): class ext_feature: features = [ @@ -426,13 +472,14 @@ def finalize_options(self) -> None: def _update_extension( self, name: str, - libraries: list[str] | list[str | bool | None], + libraries: list[str] | list[str | bool | None] | None = None, define_macros: list[tuple[str, str | None]] | None = None, sources: list[str] | None = None, ) -> None: for extension in self.extensions: if extension.name == name: - extension.libraries += libraries + if libraries is not None: + extension.libraries += libraries if define_macros is not None: extension.define_macros += define_macros if sources is not None: @@ -890,7 +937,10 @@ def build_extensions(self) -> None: defs.append(("PILLOW_VERSION", f'"{PILLOW_VERSION}"')) - self._update_extension("PIL._imaging", libs, defs) + tls_define_macros = _try_compile_tls_define_macros(self.compiler) + self._update_extension("PIL._imaging", libs, defs + tls_define_macros) + self._update_extension("PIL._imagingmath", define_macros=tls_define_macros) + self._update_extension("PIL._imagingmorph", define_macros=tls_define_macros) # # additional libraries @@ -913,7 +963,9 @@ def build_extensions(self) -> None: libs.append(feature.get("fribidi")) else: # building FriBiDi shim from src/thirdparty srcs.append("src/thirdparty/fribidi-shim/fribidi.c") - self._update_extension("PIL._imagingft", libs, defs, srcs) + self._update_extension( + "PIL._imagingft", libs, defs + tls_define_macros, srcs + ) else: self._remove_extension("PIL._imagingft") @@ -922,19 +974,19 @@ def build_extensions(self) -> None: libs = [feature.get("lcms")] if sys.platform == "win32": libs.extend(["user32", "gdi32"]) - self._update_extension("PIL._imagingcms", libs) + self._update_extension("PIL._imagingcms", libs, tls_define_macros) else: self._remove_extension("PIL._imagingcms") webp = feature.get("webp") if isinstance(webp, str): libs = [webp, webp + "mux", webp + "demux"] - self._update_extension("PIL._webp", libs) + self._update_extension("PIL._webp", libs, tls_define_macros) else: self._remove_extension("PIL._webp") tk_libs = ["psapi"] if sys.platform in ("win32", "cygwin") else [] - self._update_extension("PIL._imagingtk", tk_libs) + self._update_extension("PIL._imagingtk", tk_libs, tls_define_macros) build_ext.build_extensions(self) diff --git a/src/_imaging.c b/src/_imaging.c index 2fd2deffbe6..8aadad915c1 100644 --- a/src/_imaging.c +++ b/src/_imaging.c @@ -3912,34 +3912,49 @@ _get_stats(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingMemoryArena arena = &ImagingDefaultArena; - - v = PyLong_FromLong(arena->stats_new_count); + long stats_new_count = 0; + long stats_allocated_blocks = 0; + long stats_reused_blocks = 0; + long stats_reallocated_blocks = 0; + long stats_freed_blocks = 0; + long blocks_cached = 0; + + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + stats_new_count += arena->stats_new_count; + stats_allocated_blocks += arena->stats_allocated_blocks; + stats_reused_blocks += arena->stats_reused_blocks; + stats_reallocated_blocks += arena->stats_reallocated_blocks; + stats_freed_blocks += arena->stats_freed_blocks; + blocks_cached += arena->blocks_cached; + MUTEX_UNLOCK(&arena->mutex); + } + + v = PyLong_FromLong(stats_new_count); PyDict_SetItemString(d, "new_count", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->stats_allocated_blocks); + v = PyLong_FromLong(stats_allocated_blocks); PyDict_SetItemString(d, "allocated_blocks", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->stats_reused_blocks); + v = PyLong_FromLong(stats_reused_blocks); PyDict_SetItemString(d, "reused_blocks", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->stats_reallocated_blocks); + v = PyLong_FromLong(stats_reallocated_blocks); PyDict_SetItemString(d, "reallocated_blocks", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->stats_freed_blocks); + v = PyLong_FromLong(stats_freed_blocks); PyDict_SetItemString(d, "freed_blocks", v ? v : Py_None); Py_XDECREF(v); - v = PyLong_FromLong(arena->blocks_cached); + v = PyLong_FromLong(blocks_cached); PyDict_SetItemString(d, "blocks_cached", v ? v : Py_None); Py_XDECREF(v); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); return d; } @@ -3949,14 +3964,16 @@ _reset_stats(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingMemoryArena arena = &ImagingDefaultArena; - arena->stats_new_count = 0; - arena->stats_allocated_blocks = 0; - arena->stats_reused_blocks = 0; - arena->stats_reallocated_blocks = 0; - arena->stats_freed_blocks = 0; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + arena->stats_new_count = 0; + arena->stats_allocated_blocks = 0; + arena->stats_reused_blocks = 0; + arena->stats_reallocated_blocks = 0; + arena->stats_freed_blocks = 0; + MUTEX_UNLOCK(&arena->mutex); + } Py_RETURN_NONE; } @@ -3967,9 +3984,10 @@ _get_alignment(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - int alignment = ImagingDefaultArena.alignment; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + MUTEX_LOCK(&arena->mutex); + int alignment = arena->alignment; + MUTEX_UNLOCK(&arena->mutex); return PyLong_FromLong(alignment); } @@ -3979,9 +3997,10 @@ _get_block_size(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - int block_size = ImagingDefaultArena.block_size; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + MUTEX_LOCK(&arena->mutex); + int block_size = arena->block_size; + MUTEX_UNLOCK(&arena->mutex); return PyLong_FromLong(block_size); } @@ -3991,9 +4010,10 @@ _get_blocks_max(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - int blocks_max = ImagingDefaultArena.blocks_max; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + MUTEX_LOCK(&arena->mutex); + int blocks_max = arena->blocks_max; + MUTEX_UNLOCK(&arena->mutex); return PyLong_FromLong(blocks_max); } @@ -4014,9 +4034,12 @@ _set_alignment(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingDefaultArena.alignment = alignment; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + arena->alignment = alignment; + MUTEX_UNLOCK(&arena->mutex); + } Py_RETURN_NONE; } @@ -4038,9 +4061,12 @@ _set_block_size(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingDefaultArena.block_size = block_size; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + arena->block_size = block_size; + MUTEX_UNLOCK(&arena->mutex); + } Py_RETURN_NONE; } @@ -4058,15 +4084,20 @@ _set_blocks_max(PyObject *self, PyObject *args) { } if ((unsigned long)blocks_max > - SIZE_MAX / sizeof(ImagingDefaultArena.blocks_pool[0])) { + SIZE_MAX / sizeof(ImagingGetArena()->blocks_pool[0])) { PyErr_SetString(PyExc_ValueError, "blocks_max is too large"); return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - int status = ImagingMemorySetBlocksMax(&ImagingDefaultArena, blocks_max); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); - if (!status) { + int error = 0; + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + error |= ImagingMemorySetBlocksMax(arena, blocks_max); + MUTEX_UNLOCK(&arena->mutex); + } + + if (error) { return ImagingError_MemoryError(); } @@ -4081,9 +4112,12 @@ _clear_cache(PyObject *self, PyObject *args) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingMemoryClearCache(&ImagingDefaultArena, i); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena; + IMAGING_ARENAS_FOREACH(arena) { + MUTEX_LOCK(&arena->mutex); + ImagingMemoryClearCache(arena, i); + MUTEX_UNLOCK(&arena->mutex); + } Py_RETURN_NONE; } diff --git a/src/libImaging/Imaging.h b/src/libImaging/Imaging.h index 0c2d3fc2e34..6bb93d5403f 100644 --- a/src/libImaging/Imaging.h +++ b/src/libImaging/Imaging.h @@ -51,6 +51,20 @@ extern "C" { * extensions, see http://www.effbot.org/zone/pil-extending.htm */ +#ifdef Py_GIL_DISABLED +#if defined(__cplusplus) +#define IMAGING_TLS thread_local +#elif defined(HAVE_THREAD_LOCAL) +#define IMAGING_TLS thread_local +#elif defined(HAVE__THREAD_LOCAL) +#define IMAGING_TLS _Thread_local +#elif defined(HAVE___THREAD) +#define IMAGING_TLS __thread +#elif defined(HAVE___DECLSPEC_THREAD_) +#define IMAGING_TLS __declspec(thread) +#endif +#endif + /* Handles */ typedef struct ImagingMemoryInstance *Imaging; @@ -104,6 +118,10 @@ struct ImagingMemoryInstance { /* Virtual methods */ void (*destroy)(Imaging im); + +#ifdef IMAGING_TLS + int arenaindex; /* Index of the arena this image is associated with. */ +#endif }; #define IMAGING_PIXEL_1(im, x, y) ((im)->image8[(y)][(x)]) @@ -161,6 +179,9 @@ typedef struct ImagingMemoryArena { int stats_reallocated_blocks; /* Number of blocks which were actually reallocated after retrieving */ int stats_freed_blocks; /* Number of freed blocks */ +#ifdef IMAGING_TLS + int index; /* Index of the arena in the global array. */ +#endif #ifdef Py_GIL_DISABLED PyMutex mutex; #endif @@ -169,7 +190,35 @@ typedef struct ImagingMemoryArena { /* Objects */ /* ------- */ +#ifdef IMAGING_TLS +/* In this case we both do not have the GIL and have thread-local storage, so we + * will allocate a set of arenas and associated them with threads one at a time. + */ +#define IMAGING_ARENAS_COUNT 8 +extern struct ImagingMemoryArena ImagingArenas[IMAGING_ARENAS_COUNT]; + +/* Provide a macro that loops through each arena that has been + * statically-allocated. This is necessary to properly handle stats. + */ +#define IMAGING_ARENAS_FOREACH(arena) \ + for ((arena) = &ImagingArenas[0]; arena->block_size; ++(arena)) +#else +/* In this case we either have the GIL or do not have thread-local storage, in + * which case we will only allocate a single arena. + */ extern struct ImagingMemoryArena ImagingDefaultArena; + +/* Provide a macro that loops through each arena that has been + * statically-allocated. In this case because there is only one, this is + * effectively a single block of code. + */ +#define IMAGING_ARENAS_FOREACH(arena) \ + for ((arena) = &ImagingDefaultArena; (arena); (arena) = NULL) +#endif + +ImagingMemoryArena +ImagingGetArena(void); + extern int ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max); extern void diff --git a/src/libImaging/Storage.c b/src/libImaging/Storage.c index 522e9f37557..16f07816a9a 100644 --- a/src/libImaging/Storage.c +++ b/src/libImaging/Storage.c @@ -218,9 +218,10 @@ ImagingNewPrologueSubtype(const char *mode, int xsize, int ysize, int size) { break; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - ImagingDefaultArena.stats_new_count += 1; - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + MUTEX_LOCK(&arena->mutex); + arena->stats_new_count += 1; + MUTEX_UNLOCK(&arena->mutex); return im; } @@ -258,23 +259,107 @@ ImagingDelete(Imaging im) { /* Allocate image as an array of line buffers. */ #define IMAGING_PAGE_SIZE (4096) +#define IMAGING_ARENA_BLOCK_SIZE (16 * 1024 * 1024) +#ifdef IMAGING_TLS +/* This is the overall process-level index that keeps track of the next index + * that will be assigned to a thread. + */ +static uint64_t ImagingArenaIndex = 0; + +/* This is the thread-local index that associated a thread with an arena in the + * statically-allocated list. + */ +static IMAGING_TLS uint64_t ImagingArenaThreadIndex = UINT64_MAX; + +/* These are the statically-allocated arenas. */ +struct ImagingMemoryArena ImagingArenas[IMAGING_ARENAS_COUNT] = { + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 0, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 1, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 2, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 3, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 4, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 5, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 6, {0}}, + {1, IMAGING_ARENA_BLOCK_SIZE, 0, 0, NULL, 0, 0, 0, 0, 0, 7, {0}}, + {0} +}; + +/* Get a pointer to the correct arena for this context. In this case where we + * are using a round-robin approach to the statically allocated arenas, we will + * return the arena that is assigned to the thread on first use. + */ +ImagingMemoryArena +ImagingGetArena(void) { + if (ImagingArenaThreadIndex == UINT64_MAX) { + ImagingArenaThreadIndex = + _Py_atomic_add_uint64(&ImagingArenaIndex, 1) % IMAGING_ARENAS_COUNT; + } + return &ImagingArenas[ImagingArenaThreadIndex]; +} + +/* Return the arena associated with the given image. In this case the index of + * the arena is stored on the image itself. + */ +ImagingMemoryArena +ImagingGetArenaFromImaging(Imaging im) { + int arenaindex = im->arenaindex; + assert(arenaindex >= 0 && arenaindex < IMAGING_ARENAS_COUNT); + return &ImagingArenas[arenaindex]; +} + +/* Set the arena index on the given image based on the index of the arena. This + * is necessary in order to return the blocks to the correct arena when the + * image is destroyed. + */ +static void +ImagingSetArenaOnImaging(Imaging im, ImagingMemoryArena arena) { + im->arenaindex = arena->index; +} +#else +/* Because we have the GIL (or do not have thread-local storage), we only have a + * single arena. + */ struct ImagingMemoryArena ImagingDefaultArena = { - 1, // alignment - 16 * 1024 * 1024, // block_size - 0, // blocks_max - 0, // blocks_cached - NULL, // blocks_pool + 1, // alignment + IMAGING_ARENA_BLOCK_SIZE, // block_size + 0, // blocks_max + 0, // blocks_cached + NULL, // blocks_pool 0, 0, 0, 0, 0, // Stats #ifdef Py_GIL_DISABLED + /* On the very off-chance that someone is running free-threaded Python on a + * platform that does not support thread-local storage, we need a mutex + * here. + */ {0}, #endif }; +/* Get a pointer to the correct arena for this context. In this case where we + * either have the GIL or we do not have TLS, we will return only the default + * arena. + */ +ImagingMemoryArena +ImagingGetArena(void) { + return &ImagingDefaultArena; +} + +/* Return the arena associated with the given image. In this case because we + * only have one arena, we always return the default arena. + */ +#define ImagingGetArenaFromImaging(im) &ImagingDefaultArena + +/* Set the arena index on the given image based on the index of the arena. In + * this case because we only have one arena, we do not need to do anything. + */ +#define ImagingSetArenaOnImaging(im, arena) +#endif + int ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max) { void *p; @@ -288,18 +373,18 @@ ImagingMemorySetBlocksMax(ImagingMemoryArena arena, int blocks_max) { p = realloc(arena->blocks_pool, sizeof(*arena->blocks_pool) * blocks_max); if (!p) { // Leave previous blocks_max value - return 0; + return 1; } arena->blocks_pool = p; } else { arena->blocks_pool = calloc(sizeof(*arena->blocks_pool), blocks_max); if (!arena->blocks_pool) { - return 0; + return 1; } } arena->blocks_max = blocks_max; - return 1; + return 0; } void @@ -369,12 +454,13 @@ ImagingDestroyArray(Imaging im) { int y = 0; if (im->blocks) { - MUTEX_LOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArenaFromImaging(im); + MUTEX_LOCK(&arena->mutex); while (im->blocks[y].ptr) { - memory_return_block(&ImagingDefaultArena, im->blocks[y]); + memory_return_block(arena, im->blocks[y]); y += 1; } - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + MUTEX_UNLOCK(&arena->mutex); free(im->blocks); } } @@ -504,11 +590,12 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) { return NULL; } - MUTEX_LOCK(&ImagingDefaultArena.mutex); - Imaging tmp = ImagingAllocateArray( - im, &ImagingDefaultArena, dirty, ImagingDefaultArena.block_size - ); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + ImagingMemoryArena arena = ImagingGetArena(); + ImagingSetArenaOnImaging(im, arena); + + MUTEX_LOCK(&arena->mutex); + Imaging tmp = ImagingAllocateArray(im, arena, dirty, arena->block_size); + MUTEX_UNLOCK(&arena->mutex); if (tmp) { return im; } @@ -516,9 +603,9 @@ ImagingNewInternal(const char *mode, int xsize, int ysize, int dirty) { ImagingError_Clear(); // Try to allocate the image once more with smallest possible block size - MUTEX_LOCK(&ImagingDefaultArena.mutex); - tmp = ImagingAllocateArray(im, &ImagingDefaultArena, dirty, IMAGING_PAGE_SIZE); - MUTEX_UNLOCK(&ImagingDefaultArena.mutex); + MUTEX_LOCK(&arena->mutex); + tmp = ImagingAllocateArray(im, arena, dirty, IMAGING_PAGE_SIZE); + MUTEX_UNLOCK(&arena->mutex); if (tmp) { return im; }