From 8d9e60302eb9cdcfba681fb423b245ced68aff1f Mon Sep 17 00:00:00 2001 From: Lucas Werkmeister Date: Mon, 17 Jan 2022 20:06:42 +0100 Subject: [PATCH] SECURITY: Sort Special:WhatLinksHere by namespace MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ApiQueryBacklinks already sorts by the from_namespace column before the from column, but SpecialWhatLinksHere only sorted by the from column so far (i.e. the page ID of the linking page). This means that MySQL cannot always use the indexes on the backlinks and related tables efficiently, turning the special page into a potential DoS vector for certain parameter combinations (where it reads millions of rows and then sorts them in memory). Fix this by sorting by namespace first (except for the redirect table, which doesn’t have an rd_from column); this also means that we always want to add a filter for the namespaces, even when it’s just filtering for all the known namespaces. The URL changes format again, and now looks like &offset=0|123, where 0 is the namespace of page 123, and the results will be the pages in the same namespace with an ID above 123, or the pages in namespaces above 0 regardless of page ID (though still sorted). Old URLs are of course supported, and we look up the relevant namespace of the given page ID on-the-fly in that case. Bug: T297754 --- includes/specials/SpecialWhatLinksHere.php | 151 +++++++++++++++------ 1 file changed, 108 insertions(+), 43 deletions(-) diff --git a/includes/specials/SpecialWhatLinksHere.php b/includes/specials/SpecialWhatLinksHere.php index 4271ad76d0..0c6c19f26b 100644 --- a/includes/specials/SpecialWhatLinksHere.php +++ b/includes/specials/SpecialWhatLinksHere.php @@ -23,6 +23,7 @@ use MediaWiki\Cache\LinkBatchFactory; use MediaWiki\Content\IContentHandlerFactory; +use MediaWiki\MediaWikiServices; use Wikimedia\Rdbms\IDatabase; use Wikimedia\Rdbms\ILoadBalancer; use Wikimedia\Rdbms\SelectQueryBuilder; @@ -94,7 +95,7 @@ public function execute( $par ) { $opts->add( 'target', '' ); $opts->add( 'namespace', '', FormOptions::INTNULL ); $opts->add( 'limit', $this->getConfig()->get( 'QueryPageDefaultLimit' ) ); - $opts->add( 'offset', 0 ); + $opts->add( 'offset', '' ); $opts->add( 'from', 0 ); $opts->add( 'dir', 'next' ); $opts->add( 'hideredirs', false ); @@ -130,31 +131,74 @@ public function execute( $par ) { $out->setPageTitle( $this->msg( 'whatlinkshere-title', $this->target->getPrefixedText() ) ); $out->addBacklinkSubtitle( $this->target ); - // Workaround for legacy 'from' and 'back' system. - // We need only 'from' param to get offset. - $from = $opts->getValue( 'from' ); - $opts->reset( 'from' ); - $dir = $from ? 'next' : $opts->getValue( 'dir' ); - // 'from' was included in result set, offset is excluded. We need to align them. - $offset = $from ? $from - 1 : $opts->getValue( 'offset' ); + [ $offsetNamespace, $offsetPageID, $dir ] = $this->parseOffsetAndDir( $opts ); $this->showIndirectLinks( 0, $this->target, $opts->getValue( 'limit' ), - $offset, + $offsetNamespace, + $offsetPageID, $dir ); } + /** + * Parse the offset and direction parameters. + * + * Three parameter kinds are supported: + * * from=123 (legacy), where page ID 123 is the first included one + * * offset=123&dir=next/prev (legacy), where page ID 123 is the last excluded one + * * offset=0|123&dir=next/prev (current), where namespace 0 page ID 123 is the last excluded one + * + * @param FormOptions $opts + * @return array( int $offsetNamespace, int $offsetPageID, string $dir ) + */ + private function parseOffsetAndDir( FormOptions $opts ): array { + $from = $opts->getValue( 'from' ); + $opts->reset( 'from' ); + + if ( $from ) { + $dir = 'next'; + $offsetNamespace = null; + $offsetPageID = $from - 1; + } else { + $dir = $opts->getValue( 'dir' ); + [ $offsetNamespaceString, $offsetPageIDString ] = explode( + '|', + $opts->getValue( 'offset' ) . '|' + ); + if ( !$offsetPageIDString ) { + $offsetPageIDString = $offsetNamespaceString; + $offsetNamespaceString = ''; + } + if ( is_numeric( $offsetNamespaceString ) ) { + $offsetNamespace = (int)$offsetNamespaceString; + } else { + $offsetNamespace = null; + } + $offsetPageID = (int)$offsetPageIDString; + } + + if ( $offsetNamespace === null ) { + $offsetTitle = MediaWikiServices::getInstance() + ->getTitleFactory() + ->newFromID( $offsetPageID ); + $offsetNamespace = $offsetTitle ? $offsetTitle->getNamespace() : 0; + } + + return [ $offsetNamespace, $offsetPageID, $dir ]; + } + /** * @param int $level Recursion level * @param Title $target Target title * @param int $limit Number of entries to display - * @param int $offset Display from this article ID (excluded) + * @param int $offsetNamespace Display from this namespace number (included) + * @param int $offsetPageID Display from this article ID (excluded) * @param string $dir 'next' or 'prev' */ - private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir = 'next' ) { + private function showIndirectLinks( $level, $target, $limit, $offsetNamespace = 0, $offsetPageID = 0, $dir = 'next' ) { $out = $this->getOutput(); $dbr = $this->loadBalancer->getConnectionRef( ILoadBalancer::DB_REPLICA ); @@ -196,18 +240,25 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir = } else { $namespaces = $namespace; } - $conds['redirect']['page_namespace'] = $namespaces; - $conds['pagelinks']['pl_from_namespace'] = $namespaces; - $conds['templatelinks']['tl_from_namespace'] = $namespaces; - $conds['imagelinks']['il_from_namespace'] = $namespaces; + } else { + // Select all namespaces. + // This allows the database to use the *_from_namespace index. (T297754) + $namespaces = $this->namespaceInfo->getValidNamespaces(); } + $conds['redirect']['page_namespace'] = $namespaces; + $conds['pagelinks']['pl_from_namespace'] = $namespaces; + $conds['templatelinks']['tl_from_namespace'] = $namespaces; + $conds['imagelinks']['il_from_namespace'] = $namespaces; - if ( $offset ) { + if ( $offsetPageID ) { $rel = $dir === 'prev' ? '<' : '>'; - $conds['redirect'][] = "rd_from $rel $offset"; - $conds['templatelinks'][] = "tl_from $rel $offset"; - $conds['pagelinks'][] = "pl_from $rel $offset"; - $conds['imagelinks'][] = "il_from $rel $offset"; + $conds['redirect'][] = "rd_from $rel $offsetPageID"; + $conds['templatelinks'][] = "(tl_from_namespace = $offsetNamespace AND tl_from $rel $offsetPageID " . + "OR tl_from_namespace $rel $offsetNamespace)"; + $conds['pagelinks'][] = "(pl_from_namespace = $offsetNamespace AND pl_from $rel $offsetPageID " . + "OR pl_from_namespace $rel $offsetNamespace)"; + $conds['imagelinks'][] = "(il_from_namespace = $offsetNamespace AND il_from $rel $offsetPageID " . + "OR il_from_namespace $rel $offsetNamespace)"; } if ( $hideredirs ) { @@ -236,7 +287,7 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir = ->table( $table ) ->fields( [ $fromCol, 'rd_from', 'rd_fragment' ] ) ->conds( $conds[$table] ) - ->orderBy( $fromCol, $sortDirection ) + ->orderBy( [ $fromCol . '_namespace', $fromCol ], $sortDirection ) ->limit( 2 * $queryLimit ) ->leftJoin( 'redirect', 'redirect', $on ); @@ -245,7 +296,7 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir = ->join( 'page', 'page', "$fromCol = page_id" ) ->fields( [ 'page_id', 'page_namespace', 'page_title', 'rd_from', 'rd_fragment', 'page_is_redirect' ] ) - ->orderBy( 'page_id', $sortDirection ) + ->orderBy( [ 'page_namespace', 'page_id' ], $sortDirection ) ->limit( $queryLimit ) ->caller( $fname ) ->fetchResultSet(); @@ -339,40 +390,54 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir = } } - // Sort by key and then change the keys to 0-based indices - ksort( $rows ); - $rows = array_values( $rows ); + // Sort by namespace + page ID, changing the keys to 0-based indices + usort( $rows, static function ( $rowA, $rowB ) { + if ( $rowA->page_namespace !== $rowB->page_namespace ) { + return $rowA->page_namespace < $rowB->page_namespace ? -1 : 1; + } + if ( $rowA->page_id !== $rowB->page_id ) { + return $rowA->page_id < $rowB->page_id ? - 1 : 1; + } + return 0; + } ); $numRows = count( $rows ); // Work out the start and end IDs, for prev/next links if ( !$limit ) { // T289351 - $nextId = $prevId = false; + $nextNamespace = $nextPageId = $prevNamespace = $prevPageId = false; $rows = []; } elseif ( $dir === 'prev' ) { if ( $numRows > $limit ) { // More rows available after these ones - // Get the nextId from the last row in the result set - $nextId = $rows[$limit]->page_id; + // Get the next row from the last row in the result set + $nextNamespace = $rows[$limit]->page_namespace; + $nextPageId = $rows[$limit]->page_id; // Remove undisplayed rows, for dir='prev' we need to discard first record after sorting $rows = array_slice( $rows, 1, $limit ); - // Get the prevId from the first displayed row - $prevId = $rows[0]->page_id; + // Get the prev row from the first displayed row + $prevNamespace = $rows[0]->page_namespace; + $prevPageId = $rows[0]->page_id; } else { - // Get the nextId from the last displayed row - $nextId = $rows[$numRows - 1]->page_id; - $prevId = false; + // Get the next row from the last displayed row + $nextNamespace = $rows[$numRows - 1]->page_namespace; + $nextPageId = $rows[$numRows - 1]->page_id; + $prevNamespace = false; + $prevPageId = false; } } else { // If offset is not set disable prev link - $prevId = $offset ? $rows[0]->page_id : false; + $prevNamespace = $offsetPageID ? $rows[0]->page_namespace : false; + $prevPageId = $offsetPageID ? $rows[0]->page_id : false; if ( $numRows > $limit ) { - // Get the nextId from the last displayed row - $nextId = $rows[$limit - 1]->page_id; + // Get the next row from the last displayed row + $nextNamespace = $rows[$limit - 1]->page_namespace; + $nextPageId = $rows[$limit - 1]->page_id; // Remove undisplayed rows $rows = array_slice( $rows, 0, $limit ); } else { - $nextId = false; + $nextNamespace = false; + $nextPageId = false; } } @@ -403,7 +468,7 @@ private function showIndirectLinks( $level, $target, $limit, $offset = 0, $dir = $out->addWikiMsg( 'whatlinkshere-count', Message::numParam( count( $rows ) ) ); - $prevnext = $this->getPrevNext( $prevId, $nextId ); + $prevnext = $this->getPrevNext( $prevNamespace, $prevPageId, $nextNamespace, $nextPageId ); $out->addHTML( $prevnext ); } $out->addHTML( $this->listStart( $level ) ); @@ -543,7 +608,7 @@ private function makeSelfLink( $text, $query ) { ); } - private function getPrevNext( $prevId, $nextId ) { + private function getPrevNext( $prevNamespace, $prevPageId, $nextNamespace, $nextPageId ) { $currentLimit = $this->opts->getValue( 'limit' ); $prev = $this->msg( 'whatlinkshere-prev' )->numParams( $currentLimit )->text(); $next = $this->msg( 'whatlinkshere-next' )->numParams( $currentLimit )->text(); @@ -551,12 +616,12 @@ private function getPrevNext( $prevId, $nextId ) { $changed = $this->opts->getChangedValues(); unset( $changed['target'] ); // Already in the request title - if ( $prevId != 0 ) { - $overrides = [ 'dir' => 'prev', 'offset' => $prevId, ]; + if ( $prevPageId != 0 ) { + $overrides = [ 'dir' => 'prev', 'offset' => "$prevNamespace|$prevPageId", ]; $prev = Message::rawParam( $this->makeSelfLink( $prev, array_merge( $changed, $overrides ) ) ); } - if ( $nextId != 0 ) { - $overrides = [ 'dir' => 'next', 'offset' => $nextId, ]; + if ( $nextPageId != 0 ) { + $overrides = [ 'dir' => 'next', 'offset' => "$nextNamespace|$nextPageId", ]; $next = Message::rawParam( $this->makeSelfLink( $next, array_merge( $changed, $overrides ) ) ); } -- 2.32.0