From 3788acd4b95c91551aa6551689af0c547128fd7d Mon Sep 17 00:00:00 2001 From: Itamar Givon Date: Fri, 20 Nov 2020 13:22:17 +0100 Subject: [PATCH] SECURITY: DNM: Add onArticleDelete hook handler to EntityDataPurger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As a followup to the introduction of EntityDataPurger, this hook handler ensures that all cached urls of a deleted entity and its revisions are purged after page deletion. [This version of the patch is not meant to be widely deployed – it adds a bunch of debug logging to help figure out why this didn’t seem to work in production. It should be fine to sync to a debug server, or even, if absolutely necessary, to all servers, but it probably shouldn’t end up in /srv/patches. T260349#6636794 has the non-debug version.] Bug: T260349 Change-Id: Ic270b2814e59c73b964c60533082eb73d46d72c3 --- extension-repo.json | 5 +- repo/includes/Hooks/EntityDataPurger.php | 138 ++++++++++- .../includes/Hooks/EntityDataPurgerTest.php | 225 +++++++++++++----- 3 files changed, 302 insertions(+), 66 deletions(-) diff --git a/extension-repo.json b/extension-repo.json index 32edde2886..b7a6432355 100644 --- a/extension-repo.json +++ b/extension-repo.json @@ -479,7 +479,9 @@ "class": "\\Wikibase\\Repo\\Hooks\\EntityDataPurger", "factory": "\\Wikibase\\Repo\\Hooks\\EntityDataPurger::factory", "services": [ - "HtmlCacheUpdater" + "HtmlCacheUpdater", + "DBLoadBalancerFactory", + "MainConfig" ] }, "FederatedPropertiesSpecialPage": { @@ -530,6 +532,7 @@ "ApiQuery::moduleManager": "\\Wikibase\\Repo\\RepoHooks::onApiQueryModuleManager", "ArticleDeleteComplete": [ "DeleteDispatcher", + "EntityDataPurger", "\\Wikibase\\Repo\\RepoHooks::onArticleDeleteComplete" ], "ArticleRevisionVisibilitySet": [ diff --git a/repo/includes/Hooks/EntityDataPurger.php b/repo/includes/Hooks/EntityDataPurger.php index aaa63ed94a..612cc697ab 100644 --- a/repo/includes/Hooks/EntityDataPurger.php +++ b/repo/includes/Hooks/EntityDataPurger.php @@ -4,17 +4,22 @@ namespace Wikibase\Repo\Hooks; +use BatchRowIterator; +use Config; use HtmlCacheUpdater; use MediaWiki\Hook\ArticleRevisionVisibilitySetHook; +use MediaWiki\Logger\LoggerFactory; +use MediaWiki\Page\Hook\ArticleDeleteCompleteHook; use Title; use Wikibase\Lib\Store\EntityIdLookup; use Wikibase\Repo\LinkedData\EntityDataUriManager; use Wikibase\Repo\WikibaseRepo; +use Wikimedia\Rdbms\ILBFactory; /** * @license GPL-2.0-or-later */ -class EntityDataPurger implements ArticleRevisionVisibilitySetHook { +class EntityDataPurger implements ArticleRevisionVisibilitySetHook, ArticleDeleteCompleteHook { /** @var EntityIdLookup */ private $entityIdLookup; @@ -24,31 +29,49 @@ class EntityDataPurger implements ArticleRevisionVisibilitySetHook { /** @var HtmlCacheUpdater */ private $htmlCacheUpdater; + /** + * @var ILBFactory + */ + private $loadBalancerFactory; + /** + * @var int + */ + private $batchSize; public function __construct( EntityIdLookup $entityIdLookup, EntityDataUriManager $entityDataUriManager, - HtmlCacheUpdater $htmlCacheUpdater + HtmlCacheUpdater $htmlCacheUpdater, + ILBFactory $loadBalancerFactory, + int $batchSize ) { $this->entityIdLookup = $entityIdLookup; $this->entityDataUriManager = $entityDataUriManager; $this->htmlCacheUpdater = $htmlCacheUpdater; + $this->loadBalancerFactory = $loadBalancerFactory; + $this->batchSize = $batchSize; } - public static function factory( HtmlCacheUpdater $htmlCacheUpdater ): self { + public static function factory( + HtmlCacheUpdater $htmlCacheUpdater, + ILBFactory $loadBalancerFactory, + Config $config + ): self { $wikibaseRepo = WikibaseRepo::getDefaultInstance(); return new self( $wikibaseRepo->getEntityIdLookup(), $wikibaseRepo->getEntityDataUriManager(), - $htmlCacheUpdater + $htmlCacheUpdater, + $loadBalancerFactory, + $config->get( 'UpdateRowsPerQuery' ) ); } /** - * @param Title $title - * @param int[] $ids - * @param int[][] $visibilityChangeMap - */ + * @param Title $title + * @param int[] $ids + * @param int[][] $visibilityChangeMap + */ public function onArticleRevisionVisibilitySet( $title, $ids, $visibilityChangeMap ): void { $entityId = $this->entityIdLookup->getEntityIdForTitle( $title ); if ( !$entityId ) { @@ -68,4 +91,103 @@ public function onArticleRevisionVisibilitySet( $title, $ids, $visibilityChangeM } } + /** + * @param \WikiPage $wikiPage + * @param \User $user + * @param string $reason + * @param int $id + * @param \Content|null $content + * @param \ManualLogEntry $logEntry + * @param int $archivedRevisionCount + * @return bool|void + */ + public function onArticleDeleteComplete( + $wikiPage, + $user, + $reason, + $id, + $content, + $logEntry, + $archivedRevisionCount + ) { + $logger = LoggerFactory::getInstance( 'Wikibase' ); + $logger->debug( __METHOD__ . ': entry for page {pageId}', [ + 'wikiPage' => $wikiPage, + 'pageId' => $id, + 'archivedRevisionCount' => $archivedRevisionCount, + ] ); + $title = $wikiPage->getTitle(); + $entityId = $this->entityIdLookup->getEntityIdForTitle( $title ); + if ( !$entityId ) { + $logger->debug( __METHOD__ . ': exit, no entity ID for title {title}', [ + 'title' => $title->getPrefixedText(), + ] ); + return; + } + $logger->debug( __METHOD__ . ': entity ID is {entityId}', [ + 'entityId' => $entityId->getSerialization(), + ] ); + + // This ensures we would delete also EntityData for non revision based urls of an entity. + $urls = $this->entityDataUriManager->getPotentiallyCachedUrls( $entityId ); + + foreach ( $this->getArchivedRevisionIds( $title, $id, $logger ) as $revisionId ) { + $addedUrls = $this->entityDataUriManager->getPotentiallyCachedUrls( + $entityId, + (int)$revisionId + ); + $logger->debug( __METHOD__ . ': add {count} URLs for revision {revision}', [ + 'revision' => $revisionId, + 'urls' => $addedUrls, + 'count' => count( $addedUrls ), + ] ); + $urls = array_merge( $urls, $addedUrls ); + } + + if ( $urls !== [] ) { + $logger->debug( __METHOD__ . ': purge {count} URLs', [ + 'urls' => $urls, + 'count' => count( $urls ), + ] ); + $this->htmlCacheUpdater->purgeUrls( $urls ); + } else { + $logger->debug( __METHOD__ . ': no URLs to purge' ); + } + } + + private function getArchivedRevisionIds( Title $title, int $id, $logger ): iterable { + $dbr = $this->loadBalancerFactory + ->getMainLB() + ->getConnectionRef( DB_REPLICA ); + + $iterator = new BatchRowIterator( + $dbr, + 'archive', + [ 'ar_id' ], + $this->batchSize + ); + $logger->debug( __METHOD__ . ': created batch row iterator for batch size {batchSize}', [ + 'batchSize' => $this->batchSize, + ] ); + + $iterator->setFetchColumns( [ 'ar_rev_id' ] ); + $iterator->addConditions( [ + 'ar_page_id' => $id, + 'ar_title' => $title->getDBkey(), + 'ar_namespace' => $title->getNamespace() + ] ); + + foreach ( $iterator as $batch ) { + $logger->debug( __METHOD__ . ': got batch of {count} rows', [ + 'batch' => $batch, + 'count' => count( $batch ), + ] ); + yield from array_map( + function ( $row ) { + return $row->ar_rev_id; + }, + $batch + ); + } + } } diff --git a/repo/tests/phpunit/includes/Hooks/EntityDataPurgerTest.php b/repo/tests/phpunit/includes/Hooks/EntityDataPurgerTest.php index a58766c619..b5337aa729 100644 --- a/repo/tests/phpunit/includes/Hooks/EntityDataPurgerTest.php +++ b/repo/tests/phpunit/includes/Hooks/EntityDataPurgerTest.php @@ -10,8 +10,12 @@ use Wikibase\DataModel\Entity\ItemId; use Wikibase\Lib\Store\EntityIdLookup; use Wikibase\Repo\Hooks\EntityDataPurger; -use Wikibase\Repo\LinkedData\EntityDataRequestHandler; use Wikibase\Repo\LinkedData\EntityDataUriManager; +use Wikimedia\Rdbms\FakeResultWrapper; +use Wikimedia\Rdbms\IDatabase; +use Wikimedia\Rdbms\ILBFactory; +use Wikimedia\Rdbms\ILoadBalancer; +use WikiPage; /** * @covers \Wikibase\Repo\Hooks\EntityDataPurger @@ -21,78 +25,185 @@ * @license GPL-2.0-or-later */ class EntityDataPurgerTest extends TestCase { + private const UNUSED_ARTICLE_DELETE_ARGS = [ null, '', 0, null, null, 0 ]; - public function testGivenEntityIdLookupReturnsNull_handlerDoesNothing() { - $title = Title::newFromText( 'Project:About' ); + private function getEntityIdLookup( Title $title, ?ItemId $entityId ): EntityIdLookup { $entityIdLookup = $this->createMock( EntityIdLookup::class ); $entityIdLookup->expects( $this->once() ) ->method( 'getEntityIdForTitle' ) ->with( $title ) - ->willReturn( null ); - $entityDataUriManager = $this->createMock( EntityDataUriManager::class ); - $entityDataUriManager->expects( $this->never() ) - ->method( 'getPotentiallyCachedUrls' ); - $htmlCacheUpdater = $this->createMock( HtmlCacheUpdater::class ); - $htmlCacheUpdater->expects( $this->never() ) - ->method( 'purgeUrls' ); - $purger = new EntityDataPurger( $entityIdLookup, $entityDataUriManager, $htmlCacheUpdater ); + ->willReturn( $entityId ); - $purger->onArticleRevisionVisibilitySet( $title, [ 1, 2, 3 ], [] ); + return $entityIdLookup; } - public function testGivenEntityIdLookupReturnsId_handlerPurgesCache() { - $title = Title::newFromText( 'Item:Q1' ); - $entityId = new ItemId( 'Q1' ); - $entityIdLookup = $this->createMock( EntityIdLookup::class ); - $entityIdLookup->expects( $this->once() ) - ->method( 'getEntityIdForTitle' ) - ->with( $title ) - ->willReturn( $entityId ); + private function getEntityDataUriManager( + array $idsToTest, + ?ItemId $entityId, + bool $baseItemUrl = false + ): EntityDataUriManager { $entityDataUriManager = $this->createMock( EntityDataUriManager::class ); - $entityDataUriManager->expects( $this->once() ) + + if ( $entityId === null ) { + $entityDataUriManager->expects( $this->never() ) + ->method( 'getPotentiallyCachedUrls' ); + return $entityDataUriManager; + } + + $calls = count( $idsToTest ); + $QID = $entityId->getSerialization(); + + $arguments = array_map( function ( $revisionId ) use ( $entityId ) { + return [ $entityId, $revisionId ]; + }, $idsToTest ); + + $returns = array_map( function ( $revisionId ) use ( $QID ) { + $suffix = $QID . '/' . $revisionId; + // returns [ 'urlA/Q1/1', 'urlB/Q1/1' ] + return [ + 'urlA/' . $suffix, + 'urlB/' . $suffix + ]; + }, $idsToTest ); + + if ( $baseItemUrl ) { + array_unshift( $arguments, [ $entityId ] ); + array_unshift( $returns, [ + 'urlA/' . $QID, + 'urlB/' . $QID, + ] ); + $calls += 1; + } + + $entityDataUriManager->expects( $this->exactly( $calls ) ) ->method( 'getPotentiallyCachedUrls' ) - ->with( $entityId, 1 ) - ->willReturn( [ 'urlA/Q1/1', 'urlB/Q1/1' ] ); - $htmlCacheUpdater = $this->createMock( HtmlCacheUpdater::class ); - $htmlCacheUpdater->expects( $this->once() ) - ->method( 'purgeUrls' ) - ->with( [ 'urlA/Q1/1', 'urlB/Q1/1' ] ); - $purger = new EntityDataPurger( $entityIdLookup, $entityDataUriManager, $htmlCacheUpdater ); + ->withConsecutive( ...$arguments ) + ->willReturnOnConsecutiveCalls( ...$returns ); - $purger->onArticleRevisionVisibilitySet( $title, [ 1 ], [] ); + return $entityDataUriManager; } - public function testGivenMultipleRevisions_handlerPurgesCacheOnce() { - $title = Title::newFromText( 'Item:Q1' ); - $entityId = new ItemId( 'Q1' ); - $entityIdLookup = $this->createMock( EntityIdLookup::class ); - $entityIdLookup->expects( $this->once() ) - ->method( 'getEntityIdForTitle' ) - ->with( $title ) - ->willReturn( $entityId ); - $entityDataUriManager = $this->createMock( EntityDataUriManager::class ); - $entityDataUriManager - ->method( 'getPotentiallyCachedUrls' ) - ->withConsecutive( - [ $entityId, 1 ], - [ $entityId, 2 ], - [ $entityId, 3 ] - ) - ->willReturnOnConsecutiveCalls( - [ 'urlA/Q1/1', 'urlB/Q1/1' ], - [ 'urlA/Q1/2', 'urlB/Q1/2' ], - [ 'urlA/Q1/3', 'urlB/Q1/3' ] - ); + private function getHtmlCacheUpdater( + array $idsToTest, + ?ItemId $entityId, + bool $baseItemUrl = false + ): HtmlCacheUpdater { $htmlCacheUpdater = $this->createMock( HtmlCacheUpdater::class ); + + if ( $entityId === null ) { + $htmlCacheUpdater->expects( $this->never() ) + ->method( 'purgeUrls' ); + return $htmlCacheUpdater; + } + + $QID = $entityId->getSerialization(); + + $urls = array_reduce( $idsToTest, function ( $urls, $revisionId ) use ( $QID ) { + $suffix = $QID . '/' . $revisionId; + + return array_merge( $urls, [ + 'urlA/' . $suffix, + 'urlB/' . $suffix + ] ); + }, [] ); + + if ( $baseItemUrl ) { + array_unshift( $urls, 'urlA/' . $QID, 'urlB/' . $QID ); + } + $htmlCacheUpdater->expects( $this->once() ) ->method( 'purgeUrls' ) - ->with( [ - 'urlA/Q1/1', 'urlB/Q1/1', - 'urlA/Q1/2', 'urlB/Q1/2', - 'urlA/Q1/3', 'urlB/Q1/3', - ] ); - $purger = new EntityDataPurger( $entityIdLookup, $entityDataUriManager, $htmlCacheUpdater ); + ->with( $urls ); + + return $htmlCacheUpdater; + } + + private function getLBFactory( array $returnIds, int $batchSize ): ILBFactory { + $mockDB = $this->createMock( IDatabase::class ); + $mockLB = $this->createMock( ILoadBalancer::class ); + $mockFactory = $this->createMock( ILBFactory::class ); + + $idBatches = array_chunk( $returnIds, $batchSize ); + $returnBatches = array_map( function ( $batch ) { + $rows = array_map( function ( $id ) { + return [ + "ar_id" => 0, + "ar_rev_id" => $id + ]; + }, $batch ); + + return new FakeResultWrapper( $rows ); + }, $idBatches ); + + // Add empty result to end batch iteration + array_push( $returnBatches, new FakeResultWrapper( [] ) ); + + $mockDB->method( 'select' ) + ->willReturnOnConsecutiveCalls( ...$returnBatches ); + + $mockLB->method( 'getConnectionRef' ) + ->willReturn( $mockDB ); + + $mockFactory->method( 'getMainLB' ) + ->willReturn( $mockLB ); + + return $mockFactory; + } + + private function getEntityDataPurger( + Title $title, + array $idsToTest, + int $batchSize = 5, + bool $failedLookup = false, + bool $withBaseUri = false + ): EntityDataPurger { + $entityId = $failedLookup ? null : new ItemId( 'Q1' ); + + return new EntityDataPurger( + $this->getEntityIdLookup( $title, $entityId ), + $this->getEntityDataUriManager( $idsToTest, $entityId, $withBaseUri ), + $this->getHtmlCacheUpdater( $idsToTest, $entityId, $withBaseUri ), + $this->getLBFactory( $idsToTest, $batchSize ), + $batchSize + ); + } + + public function revisionsProvider(): array { + // [ array $revisionIds, int $batchSize, bool $failedLookup ] + return [ + 'entity id lookup returns null' => [ [], 5, true ], + 'single revision id' => [ [ 1 ] ], + 'multiple revision ids' => [ [ 1, 2, 3 ] ], + 'batch size smaller than number of ids' => [ [ 1, 2, 3 ], 2 ] + ]; + } + + /** + * @dataProvider revisionsProvider + */ + public function testOnArticleRevisionVisibilitySet( + array $revisionIds, + int $batchSize = 5, + bool $failedLookup = false + ) { + $title = Title::newFromText( 'Test:Title' ); + $purger = $this->getEntityDataPurger( $title, $revisionIds, $batchSize, $failedLookup ); + $purger->onArticleRevisionVisibilitySet( $title, $revisionIds, [] ); + } - $purger->onArticleRevisionVisibilitySet( $title, [ 1, 2, 3 ], [] ); + /** + * @dataProvider revisionsProvider + */ + public function testOnArticleDeleteComplete( + array $revisionIds, + int $batchSize = 5, + bool $failedLookup = false + ) { + $title = Title::newFromText( 'Test:Title' ); + $page = $this->createMock( WikiPage::class ); + $page->method( 'getTitle' ) + ->willReturn( $title ); + $purger = $this->getEntityDataPurger( $title, $revisionIds, $batchSize, $failedLookup, true ); + $purger->onArticleDeleteComplete( $page, ...self::UNUSED_ARTICLE_DELETE_ARGS ); } } -- 2.27.0