From 1fd57b27f0606fac681a48051f7f73ae1f210152 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20D=C4=99bi=C5=84ski?= <58430570+mateuszdebinski@users.noreply.github.com> Date: Wed, 20 Mar 2024 12:21:27 +0100 Subject: [PATCH] IBX-7149: Refactored content type-based indexing to rely on a dedicated strategy (#296) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For more details see https://issues.ibexa.co/browse/IBX-7149 and https://github.com/ibexa/core/pull/296 Key changes: * Fixed fetching batches of content IDs by a content type * Extracted content ID by content type fetching as a strategy * Introduced ContentIdBatchList result for ContentIdListGeneratorStrategy * Fixed ContentList iterator type hinting * [Tests] Aligned ContentFilteringTest after ContentList type hinting fix * [Tests] Added coverage for ContentType input generator strategy * [Tests] Added coverage for ContentIdBatchList result wrapper object --------- Co-authored-by: Andrew Longosz Co-authored-by: Paweł Niedzielski --- phpstan-baseline-7.4.neon | 5 - phpstan-baseline-8.0.neon | 5 - phpstan-baseline.neon | 32 +---- .../ContentTypeInputGeneratorStrategy.php | 79 ++++++++++++ ...ontentIdListGeneratorStrategyInterface.php | 22 ++++ src/bundle/Core/Command/ReindexCommand.php | 74 +++++------ src/bundle/Core/Resources/config/commands.yml | 6 +- .../Repository/Values/Content/ContentList.php | 11 +- src/lib/Search/Indexer/ContentIdBatchList.php | 49 +++++++ .../ContentTypeInputGeneratorStrategyTest.php | 115 +++++++++++++++++ .../Filtering/ContentFilteringTest.php | 8 +- .../Search/Indexer/ContentIdBatchListTest.php | 120 ++++++++++++++++++ 12 files changed, 433 insertions(+), 93 deletions(-) create mode 100644 src/bundle/Core/Command/Indexer/ContentIdList/ContentTypeInputGeneratorStrategy.php create mode 100644 src/bundle/Core/Command/Indexer/ContentIdListGeneratorStrategyInterface.php create mode 100644 src/lib/Search/Indexer/ContentIdBatchList.php create mode 100644 tests/bundle/Core/Command/Indexer/ContentIdList/ContentTypeInputGeneratorStrategyTest.php create mode 100644 tests/lib/Search/Indexer/ContentIdBatchListTest.php diff --git a/phpstan-baseline-7.4.neon b/phpstan-baseline-7.4.neon index aaf8fda7ec..f8a660b9fb 100644 --- a/phpstan-baseline-7.4.neon +++ b/phpstan-baseline-7.4.neon @@ -25,11 +25,6 @@ parameters: count: 1 path: src/bundle/Core/Command/CleanupVersionsCommand.php - - - message: "#^Parameter \\#1 \\$var of function count expects array\\|Countable, iterable\\&Traversable given\\.$#" - count: 1 - path: src/bundle/Core/Command/ExpireUserPasswordsCommand.php - - message: "#^Parameter \\#1 \\$fp of function fclose expects resource, resource\\|false given\\.$#" count: 1 diff --git a/phpstan-baseline-8.0.neon b/phpstan-baseline-8.0.neon index a9b6e51067..3e96991838 100644 --- a/phpstan-baseline-8.0.neon +++ b/phpstan-baseline-8.0.neon @@ -25,11 +25,6 @@ parameters: count: 1 path: src/bundle/Core/Command/CleanupVersionsCommand.php - - - message: "#^Parameter \\#1 \\$value of function count expects array\\|Countable, iterable\\&Traversable given\\.$#" - count: 1 - path: src/bundle/Core/Command/ExpireUserPasswordsCommand.php - - message: "#^Parameter \\#1 \\$stream of function fclose expects resource, resource\\|false given\\.$#" count: 1 diff --git a/phpstan-baseline.neon b/phpstan-baseline.neon index e6d4d33183..490b91030b 100644 --- a/phpstan-baseline.neon +++ b/phpstan-baseline.neon @@ -255,11 +255,6 @@ parameters: count: 1 path: src/bundle/Core/Command/RegenerateUrlAliasesCommand.php - - - message: "#^Access to an undefined property Ibexa\\\\Contracts\\\\Core\\\\Persistence\\\\Content\\\\Location\\\\Handler\\:\\:\\$pathString\\.$#" - count: 2 - path: src/bundle/Core/Command/ReindexCommand.php - - message: "#^Call to an undefined method Ibexa\\\\Core\\\\Search\\\\Common\\\\Indexer\\:\\:purge\\(\\)\\.$#" count: 1 @@ -270,21 +265,11 @@ parameters: count: 2 path: src/bundle/Core/Command/ReindexCommand.php - - - message: "#^Method Ibexa\\\\Bundle\\\\Core\\\\Command\\\\ReindexCommand\\:\\:__construct\\(\\) has parameter \\$searchIndexer with no type specified\\.$#" - count: 1 - path: src/bundle/Core/Command/ReindexCommand.php - - message: "#^Method Ibexa\\\\Bundle\\\\Core\\\\Command\\\\ReindexCommand\\:\\:configure\\(\\) has no return type specified\\.$#" count: 1 path: src/bundle/Core/Command/ReindexCommand.php - - - message: "#^Method Ibexa\\\\Bundle\\\\Core\\\\Command\\\\ReindexCommand\\:\\:getPhpProcess\\(\\) has parameter \\$contentIds with no value type specified in iterable type array\\.$#" - count: 1 - path: src/bundle/Core/Command/ReindexCommand.php - - message: "#^Method Ibexa\\\\Bundle\\\\Core\\\\Command\\\\ReindexCommand\\:\\:initialize\\(\\) has no return type specified\\.$#" count: 1 @@ -317,7 +302,7 @@ parameters: - message: "#^Property Ibexa\\\\Bundle\\\\Core\\\\Command\\\\ReindexCommand\\:\\:\\$phpPath \\(string\\) does not accept string\\|null\\.$#" - count: 2 + count: 1 path: src/bundle/Core/Command/ReindexCommand.php - @@ -6930,11 +6915,6 @@ parameters: count: 1 path: src/contracts/Repository/Iterator/BatchIteratorAdapter/ContentFilteringAdapter.php - - - message: "#^Method Ibexa\\\\Contracts\\\\Core\\\\Repository\\\\Iterator\\\\BatchIteratorAdapter\\\\ContentFilteringAdapter\\:\\:fetch\\(\\) should return Iterator but returns iterable\\&Traversable\\.$#" - count: 1 - path: src/contracts/Repository/Iterator/BatchIteratorAdapter/ContentFilteringAdapter.php - - message: "#^Method Ibexa\\\\Contracts\\\\Core\\\\Repository\\\\Iterator\\\\BatchIteratorAdapter\\\\LocationFilteringAdapter\\:\\:__construct\\(\\) has parameter \\$languages with no value type specified in iterable type array\\.$#" count: 1 @@ -7085,11 +7065,6 @@ parameters: count: 1 path: src/contracts/Repository/Values/Content/ContentList.php - - - message: "#^Method Ibexa\\\\Contracts\\\\Core\\\\Repository\\\\Values\\\\Content\\\\ContentList\\:\\:__construct\\(\\) has parameter \\$contentItems with no value type specified in iterable type array\\.$#" - count: 1 - path: src/contracts/Repository/Values/Content/ContentList.php - - message: "#^Method Ibexa\\\\Contracts\\\\Core\\\\Repository\\\\Values\\\\Content\\\\DraftList\\\\Item\\\\UnauthorizedContentDraftListItem\\:\\:__construct\\(\\) has parameter \\$payload with no value type specified in iterable type array\\.$#" count: 1 @@ -34345,11 +34320,6 @@ parameters: count: 1 path: tests/integration/Core/Repository/Filtering/ContentFilteringTest.php - - - message: "#^Cannot access offset 0 on iterable\\&Traversable\\.$#" - count: 1 - path: tests/integration/Core/Repository/Filtering/ContentFilteringTest.php - - message: "#^Cannot use array destructuring on iterable\\\\.$#" count: 1 diff --git a/src/bundle/Core/Command/Indexer/ContentIdList/ContentTypeInputGeneratorStrategy.php b/src/bundle/Core/Command/Indexer/ContentIdList/ContentTypeInputGeneratorStrategy.php new file mode 100644 index 0000000000..8b6faa8ccf --- /dev/null +++ b/src/bundle/Core/Command/Indexer/ContentIdList/ContentTypeInputGeneratorStrategy.php @@ -0,0 +1,79 @@ +contentService = $contentService; + } + + /** + * @throws \Ibexa\Contracts\Core\Repository\Exceptions\BadStateException + */ + public function getBatchList(InputInterface $input, int $batchSize): ContentIdBatchList + { + $contentList = $this->getContentList($input->getOption('content-type')); + + return new ContentIdBatchList( + $this->buildGenerator($contentList, $batchSize), + $contentList->getTotalCount(), + ); + } + + private function buildGenerator(ContentList $contentList, int $batchSize): Generator + { + $contentIds = []; + foreach ($contentList as $content) { + $contentIds[] = $content->getVersionInfo()->getContentInfo()->getId(); + if (count($contentIds) >= $batchSize) { + yield $contentIds; + $contentIds = []; + } + } + if (!empty($contentIds)) { + yield $contentIds; + } + } + + public function shouldPurgeIndex(InputInterface $input): bool + { + return false; + } + + /** + * @throws \Ibexa\Contracts\Core\Repository\Exceptions\BadStateException + */ + private function getContentList(string $contentTypeIdentifier): ContentList + { + $filter = new Filter(); + $filter + ->withCriterion( + new Query\Criterion\ContentTypeIdentifier($contentTypeIdentifier) + ) + ; + + return $this->contentService->find($filter); + } +} diff --git a/src/bundle/Core/Command/Indexer/ContentIdListGeneratorStrategyInterface.php b/src/bundle/Core/Command/Indexer/ContentIdListGeneratorStrategyInterface.php new file mode 100644 index 0000000000..2cbbb02e8e --- /dev/null +++ b/src/bundle/Core/Command/Indexer/ContentIdListGeneratorStrategyInterface.php @@ -0,0 +1,22 @@ +gateway = $gateway; @@ -82,8 +78,7 @@ public function __construct( $this->env = $env; $this->isDebug = $isDebug; $this->projectDir = $projectDir; - $this->contentService = $contentService; - $this->phpPath = $phpPath; + $this->contentIdListGeneratorStrategy = $contentIdListGeneratorStrategy; parent::__construct(); } @@ -261,28 +256,20 @@ protected function indexIncrementally( if ($since = $input->getOption('since')) { $count = $this->gateway->countContentSince(new DateTime($since)); - $generator = $this->gateway->getContentSince(new DateTime($since), $iterationCount); + $batchList = $this->gateway->getContentSince(new DateTime($since), $iterationCount); $purge = false; } elseif ($locationId = (int) $input->getOption('subtree')) { - /** @var \Ibexa\Contracts\Core\Persistence\Content\Location\Handler */ $location = $this->locationHandler->load($locationId); $count = $this->gateway->countContentInSubtree($location->pathString); - $generator = $this->gateway->getContentInSubtree($location->pathString, $iterationCount); - $purge = false; - } elseif ($contentType = $input->getOption('content-type')) { - $filter = new Filter(); - $filter - ->withCriterion( - new Query\Criterion\ContentTypeIdentifier($contentType) - ); - - $contentList = $this->contentService->find($filter); - $count = $contentList->getTotalCount(); - $generator = $this->fetchIterationFromContentList($contentList, $iterationCount); + $batchList = $this->gateway->getContentInSubtree($location->pathString, $iterationCount); $purge = false; + } elseif (!empty($input->getOption('content-type'))) { + $batchList = $this->contentIdListGeneratorStrategy->getBatchList($input, $iterationCount); + $count = $batchList->getCount(); + $purge = $this->contentIdListGeneratorStrategy->shouldPurgeIndex($input); } else { $count = $this->gateway->countAllContent(); - $generator = $this->gateway->getAllContent($iterationCount); + $batchList = $this->gateway->getAllContent($iterationCount); $purge = !$input->getOption('no-purge'); } @@ -318,13 +305,13 @@ protected function indexIncrementally( if ($processCount > 1) { $this->runParallelProcess( $progress, - $generator, + $batchList, (int)$processCount, $commit ); } else { // if we only have one process, or less iterations to warrant running several, we index it all inline - foreach ($generator as $contentIds) { + foreach ($batchList as $contentIds) { $this->searchIndexer->updateSearchIndex($contentIds, $commit); $progress->advance(1); } @@ -341,15 +328,28 @@ protected function indexIncrementally( } /** + * @param iterable> $results + * + * @return \Generator> + */ + private function buildGenerator(iterable $results): \Generator + { + yield from $results; + } + + /** + * @param iterable> $batchList + * * @throws \Ibexa\Contracts\Core\Repository\Exceptions\InvalidArgumentException */ private function runParallelProcess( ProgressBar $progress, - Generator $generator, + iterable $batchList, int $processCount, bool $commit ): void { - /** @var \Symfony\Component\Process\Process[]|null[] */ + $generator = $this->buildGenerator($batchList); + /** @var \Symfony\Component\Process\Process[]|null[] $processes */ $processes = array_fill(0, $processCount, null); do { /** @var \Symfony\Component\Process\Process $process */ @@ -390,7 +390,7 @@ private function runParallelProcess( } /** - * @param array $contentIds + * @param int[] $contentIds * * @throws \Ibexa\Contracts\Core\Repository\Exceptions\InvalidArgumentException */ @@ -478,20 +478,6 @@ public function getDeprecatedAliases(): array { return ['ezplatform:reindex']; } - - private function fetchIterationFromContentList(ContentList $contentList, int $iterationCount): Generator - { - $i = 1; - $contentIds = []; - foreach ($contentList as $content) { - $contentIds[] = $content->id; - if ($iterationCount <= $i) { - break; - } - ++$i; - } - yield $contentIds; - } } class_alias(ReindexCommand::class, 'eZ\Bundle\EzPublishCoreBundle\Command\ReindexCommand'); diff --git a/src/bundle/Core/Resources/config/commands.yml b/src/bundle/Core/Resources/config/commands.yml index ae0854c3dd..bd92d66a04 100644 --- a/src/bundle/Core/Resources/config/commands.yml +++ b/src/bundle/Core/Resources/config/commands.yml @@ -42,7 +42,7 @@ services: $env: '%kernel.environment%' $projectDir: '%kernel.project_dir%' $isDebug: '%kernel.debug%' - $contentService: '@Ibexa\Contracts\Core\Repository\ContentService' + $contentIdListGeneratorStrategy: '@Ibexa\Bundle\Core\Command\Indexer\ContentIdList\ContentTypeInputGeneratorStrategy' tags: - { name: console.command } @@ -65,3 +65,7 @@ services: Ibexa\Bundle\Core\Command\ExpireUserPasswordsCommand: autowire: true autoconfigure: true + + # Dedicated services for commands + Ibexa\Bundle\Core\Command\Indexer\ContentIdList\ContentTypeInputGeneratorStrategy: + autowire: true diff --git a/src/contracts/Repository/Values/Content/ContentList.php b/src/contracts/Repository/Values/Content/ContentList.php index ac69f85fda..ff5911b1d5 100644 --- a/src/contracts/Repository/Values/Content/ContentList.php +++ b/src/contracts/Repository/Values/Content/ContentList.php @@ -11,7 +11,6 @@ use ArrayIterator; use Ibexa\Contracts\Core\Repository\Collections\TotalCountAwareInterface; use IteratorAggregate; -use Traversable; /** * A filtered Content items list iterator. @@ -19,13 +18,15 @@ final class ContentList implements IteratorAggregate, TotalCountAwareInterface { /** @var int */ - private $totalCount; + private int $totalCount; /** @var \Ibexa\Contracts\Core\Repository\Values\Content\Content[] */ - private $contentItems; + private array $contentItems; /** * @internal for internal use by Repository + * + * @param array<\Ibexa\Contracts\Core\Repository\Values\Content\Content> $contentItems */ public function __construct(int $totalCount, array $contentItems) { @@ -39,9 +40,9 @@ public function getTotalCount(): int } /** - * @return \Ibexa\Contracts\Core\Repository\Values\Content\Content[]|\Traversable + * @return \ArrayIterator */ - public function getIterator(): Traversable + public function getIterator(): ArrayIterator { return new ArrayIterator($this->contentItems); } diff --git a/src/lib/Search/Indexer/ContentIdBatchList.php b/src/lib/Search/Indexer/ContentIdBatchList.php new file mode 100644 index 0000000000..ae8d410594 --- /dev/null +++ b/src/lib/Search/Indexer/ContentIdBatchList.php @@ -0,0 +1,49 @@ +> + */ +final class ContentIdBatchList implements IteratorAggregate +{ + /** @var iterable> */ + private iterable $list; + + private int $totalCount; + + /** + * @param iterable> $list + */ + public function __construct(iterable $list, int $totalCount) + { + $this->list = $list; + $this->totalCount = $totalCount; + } + + /** + * return \Traversable>. + */ + public function getIterator(): Traversable + { + yield from $this->list; + } + + public function getCount(): int + { + return $this->totalCount; + } +} diff --git a/tests/bundle/Core/Command/Indexer/ContentIdList/ContentTypeInputGeneratorStrategyTest.php b/tests/bundle/Core/Command/Indexer/ContentIdList/ContentTypeInputGeneratorStrategyTest.php new file mode 100644 index 0000000000..573243ba1d --- /dev/null +++ b/tests/bundle/Core/Command/Indexer/ContentIdList/ContentTypeInputGeneratorStrategyTest.php @@ -0,0 +1,115 @@ + $expectedBatches + */ + public function testGetGenerator(ContentList $contentList, int $batchSize, array $expectedBatches): void + { + $contentServiceMock = $this->createMock(ContentService::class); + $contentServiceMock->method('find')->willReturn($contentList); + + $inputMock = $this->createMock(InputInterface::class); + $inputMock->method('getOption')->with('content-type')->willReturn(uniqid('type', true)); + + $strategy = new ContentTypeInputGeneratorStrategy($contentServiceMock); + + self::assertSame( + $expectedBatches, + iterator_to_array($strategy->getBatchList($inputMock, $batchSize)) + ); + } + + /** + * @return iterable}> + */ + public function getDataForTestGetGenerator(): iterable + { + yield 'iteration count = 3, items = 10' => [ + $this->generateContentList(10), + 3, + [ + [1, 2, 3], + [4, 5, 6], + [7, 8, 9], + [10], + ], + ]; + + yield 'iteration count = 6, items = 6' => [ + $this->generateContentList(6), + 6, + [ + [1, 2, 3, 4, 5, 6], + ], + ]; + + yield 'iteration count = 2, items = 4' => [ + $this->generateContentList(4), + 2, + [ + [1, 2], + [3, 4], + ], + ]; + + yield 'iteration count = 10, items = 5' => [ + $this->generateContentList(5), + 10, + [ + [1, 2, 3, 4, 5], + ], + ]; + + yield 'iteration count = 5, items = 0' => [ + $this->generateContentList(0), + 5, + [], + ]; + } + + private function generateContentList(int $totalCount): ContentList + { + $contentItems = []; + for ($i = 0; $i < $totalCount; ++$i) { + $contentItems[] = $this->createContentItemWithIdMock($i + 1); + } + + return new ContentList($totalCount, $contentItems); + } + + private function createContentItemWithIdMock(int $id): Content + { + $contentItem = $this->createMock(Content::class); + $contentInfoMock = $this->createMock(ContentInfo::class); + $contentInfoMock->method('getId')->willReturn($id); + $versionInfoMock = $this->createMock(VersionInfo::class); + $versionInfoMock->method('getContentInfo')->willReturn($contentInfoMock); + $contentItem->method('getVersionInfo')->willReturn($versionInfoMock); + + return $contentItem; + } +} diff --git a/tests/integration/Core/Repository/Filtering/ContentFilteringTest.php b/tests/integration/Core/Repository/Filtering/ContentFilteringTest.php index 86720bdf58..5a26a80163 100644 --- a/tests/integration/Core/Repository/Filtering/ContentFilteringTest.php +++ b/tests/integration/Core/Repository/Filtering/ContentFilteringTest.php @@ -112,7 +112,9 @@ protected function findUsingContentSearch(Query $query): ContentList return new ContentList( $searchResults->totalCount, array_map( - static function (SearchHit $searchHit) { + static function (SearchHit $searchHit): Content { + self::assertInstanceOf(Content::class, $searchHit->valueObject); + return $searchHit->valueObject; }, $searchResults->searchHits @@ -332,7 +334,9 @@ private function performAndAssertSimpleSortClauseQuery(FilteringSortClause $sort $filter = new Filter(new Criterion\ContentId(57), [$sortClause]); $contentList = $this->find($filter, []); self::assertCount(1, $contentList); - self::assertSame(57, $contentList->getIterator()[0]->id); + $contentItem = $contentList->getIterator()[0]; + self::assertNotNull($contentItem); + self::assertSame(57, $contentItem->id); } /** diff --git a/tests/lib/Search/Indexer/ContentIdBatchListTest.php b/tests/lib/Search/Indexer/ContentIdBatchListTest.php new file mode 100644 index 0000000000..1a4a2d6887 --- /dev/null +++ b/tests/lib/Search/Indexer/ContentIdBatchListTest.php @@ -0,0 +1,120 @@ +>, int, array>}> + */ + public function getDataForTestGetIterator(): iterable + { + yield 'generator' => [ + $this->buildGenerator(), + 5, + [ + [1, 2, 3], + [4, 5], + ], + ]; + + yield 'array' => [ + [ + [1, 2], + [3, 4], + [5], + ], + 5, + [ + [1, 2], + [3, 4], + [5], + ], + ]; + + yield 'Traversable object' => [ + $this->buildTraversableObject(), + 5, + [ + [1, 2, 3, 4], + [5], + ], + ]; + + yield 'empty generator' => [ + $this->buildEmptyGenerator(), + 0, + [], + ]; + } + + /** + * @dataProvider getDataForTestGetIterator + * + * @param iterable> $list + * @param array> $expectedBatches + */ + public function testGetIterator(iterable $list, int $totalCount, array $expectedBatches): void + { + $contentIdBatchList = new ContentIdBatchList($list, $totalCount); + $unpackedActualBatches = []; + foreach ($contentIdBatchList as $index => $items) { + $unpackedActualBatches[$index] = $items; + } + self::assertSame($expectedBatches, $unpackedActualBatches); + } + + public function testGetCount(): void + { + $contentIdBatchList = new ContentIdBatchList([[1, 2, 3]], 3); + self::assertSame(3, $contentIdBatchList->getCount()); + } + + private function buildGenerator(): Generator + { + yield [1, 2, 3]; + yield [4, 5]; + } + + private function buildEmptyGenerator(): \Generator + { + yield from []; + } + + /** + * @return \Traversable> + */ + private function buildTraversableObject(): Traversable + { + return new class() implements IteratorAggregate { + /** + * @return \ArrayIterator + */ + public function getIterator(): ArrayIterator + { + return new ArrayIterator( + [ + [1, 2, 3, 4], + [5], + ] + ); + } + }; + } +}