From abf63dc53715ed941bf14c9399228b56d1fc56ca Mon Sep 17 00:00:00 2001 From: Kamil Madejski Date: Tue, 24 Sep 2019 12:33:54 +0200 Subject: [PATCH 1/4] EZP-30296: Introduced bulk document deleting using deleteByQuery --- lib/Handler.php | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/Handler.php b/lib/Handler.php index 977fe4e60..37ea53525 100644 --- a/lib/Handler.php +++ b/lib/Handler.php @@ -45,6 +45,8 @@ */ class Handler implements SearchHandlerInterface, Capable { + const SOLR_BULK_REMOVE_LIMIT = 1000; + /** * Content locator gateway. * @@ -340,9 +342,15 @@ protected function deleteAllItemsWithoutAdditionalLocation($locationId) $this->gateway->searchAllEndpoints($query) ); + $contentDocumentIds = []; + foreach ($searchResult->searchHits as $hit) { - $idPrefix = $this->mapper->generateContentDocumentId($hit->valueObject->id); - $this->gateway->deleteByQuery("_root_:{$idPrefix}*"); + $contentDocumentIds[] = $this->mapper->generateContentDocumentId($hit->valueObject->id) . '*'; + } + + foreach (\array_chunk($contentDocumentIds, self::SOLR_BULK_REMOVE_LIMIT) as $ids) { + $query = '_root_:(' . implode(' OR ', $ids) . ')'; + $this->gateway->deleteByQuery($query); } } From 18c9c5e3b3c527a16593317874718b59b35d6530 Mon Sep 17 00:00:00 2001 From: Kamil Madejski Date: Fri, 27 Sep 2019 15:13:24 +0200 Subject: [PATCH 2/4] Updated composer.json to prevent installing ezpublish-kernel v7.5.x --- composer.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/composer.json b/composer.json index 1d4b6ae8c..6ef49ba99 100644 --- a/composer.json +++ b/composer.json @@ -12,7 +12,7 @@ ], "require": { "php": "~5.6|~7.0", - "ezsystems/ezpublish-kernel": "~6.7.10@dev|^6.13.6@dev|~7.3.5@dev|^7.4.3@dev", + "ezsystems/ezpublish-kernel": "~6.7.10@dev|^6.13.6@dev|~7.3.5@dev|~7.4.3@dev", "netgen/query-translator": "^1.0" }, "require-dev": { From 4a99ddf65fadb13e652bc8dafd7faf58237e81ff Mon Sep 17 00:00:00 2001 From: Kamil Madejski Date: Wed, 2 Oct 2019 17:12:21 +0200 Subject: [PATCH 3/4] Updated query limits --- lib/Handler.php | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/lib/Handler.php b/lib/Handler.php index 37ea53525..b044ed910 100644 --- a/lib/Handler.php +++ b/lib/Handler.php @@ -45,7 +45,11 @@ */ class Handler implements SearchHandlerInterface, Capable { + /* Solr's maxBooleanClauses config value is 1024 */ const SOLR_BULK_REMOVE_LIMIT = 1000; + /* 32b max integer value due to Solr (JVM) limitations */ + const SOLR_MAX_QUERY_LIMIT = PHP_INT_MAX >> 32; + const DEFAULT_QUERY_LIMIT = 1000; /** * Content locator gateway. @@ -330,7 +334,7 @@ public function commit($flush = false) */ protected function deleteAllItemsWithoutAdditionalLocation($locationId) { - $query = $this->prepareQuery(); + $query = $this->prepareQuery(self::SOLR_MAX_QUERY_LIMIT); $query->filter = new Criterion\LogicalAnd( [ $this->allItemsWithinLocation($locationId), @@ -348,7 +352,7 @@ protected function deleteAllItemsWithoutAdditionalLocation($locationId) $contentDocumentIds[] = $this->mapper->generateContentDocumentId($hit->valueObject->id) . '*'; } - foreach (\array_chunk($contentDocumentIds, self::SOLR_BULK_REMOVE_LIMIT) as $ids) { + foreach (\array_chunk(\array_unique($contentDocumentIds), self::SOLR_BULK_REMOVE_LIMIT) as $ids) { $query = '_root_:(' . implode(' OR ', $ids) . ')'; $this->gateway->deleteByQuery($query); } @@ -359,7 +363,7 @@ protected function deleteAllItemsWithoutAdditionalLocation($locationId) */ protected function updateAllElementsWithAdditionalLocation($locationId) { - $query = $this->prepareQuery(); + $query = $this->prepareQuery(self::SOLR_MAX_QUERY_LIMIT); $query->filter = new Criterion\LogicalAnd( [ $this->allItemsWithinLocation($locationId), @@ -388,14 +392,16 @@ protected function updateAllElementsWithAdditionalLocation($locationId) /** * Prepare standard query for delete purpose. * + * @param int $limit + * * @return Query */ - protected function prepareQuery() + protected function prepareQuery($limit = self::DEFAULT_QUERY_LIMIT) { return new Query( [ 'query' => new Criterion\MatchAll(), - 'limit' => 1000, + 'limit' => $limit, 'offset' => 0, ] ); From 5c1f9dec512c8f30d6e89c2f9a04cbeaecdc6eb3 Mon Sep 17 00:00:00 2001 From: Kamil Madejski Date: Thu, 3 Oct 2019 10:51:27 +0200 Subject: [PATCH 4/4] Updated SOLR_MAX_QUERY_LIMIT value due to JVM limitations --- lib/Handler.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Handler.php b/lib/Handler.php index b044ed910..60204f61f 100644 --- a/lib/Handler.php +++ b/lib/Handler.php @@ -47,8 +47,8 @@ class Handler implements SearchHandlerInterface, Capable { /* Solr's maxBooleanClauses config value is 1024 */ const SOLR_BULK_REMOVE_LIMIT = 1000; - /* 32b max integer value due to Solr (JVM) limitations */ - const SOLR_MAX_QUERY_LIMIT = PHP_INT_MAX >> 32; + /* 16b max unsigned integer value due to Solr (JVM) limitations */ + const SOLR_MAX_QUERY_LIMIT = 65535; const DEFAULT_QUERY_LIMIT = 1000; /**