Index: tests/phpunit/includes/GlobalFunctions/GlobalTest.php =================================================================== --- tests/phpunit/includes/GlobalFunctions/GlobalTest.php (revision 101480) +++ tests/phpunit/includes/GlobalFunctions/GlobalTest.php (working copy) @@ -831,42 +831,42 @@ } /** - * @dataProvider provideMakeUrlIndex() + * @dataProvider provideMakeUrlIndexes() */ - function testMakeUrlIndex( $url, $expected ) { - $index = wfMakeUrlIndex( $url ); - $this->assertEquals( $expected, $index, "wfMakeUrlIndex(\"$url\")" ); + function testMakeUrlIndexes( $url, $expected ) { + $index = wfMakeUrlIndexes( $url ); + $this->assertEquals( $expected, $index, "wfMakeUrlIndexes(\"$url\")" ); } - function provideMakeUrlIndex() { + function provideMakeUrlIndexes() { return array( array( // just a regular :) 'https://bugzilla.wikimedia.org/show_bug.cgi?id=28627', - 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' + array( 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' ) ), array( // mailtos are handled special // is this really right though? that final . probably belongs earlier? 'mailto:wiki@wikimedia.org', - 'mailto:org.wikimedia@wiki.', + array( 'mailto:org.wikimedia@wiki.' ) ), // file URL cases per bug 28627... array( // three slashes: local filesystem path Unix-style 'file:///whatever/you/like.txt', - 'file://./whatever/you/like.txt' + array( 'file://./whatever/you/like.txt' ) ), array( // three slashes: local filesystem path Windows-style 'file:///c:/whatever/you/like.txt', - 'file://./c:/whatever/you/like.txt' + array( 'file://./c:/whatever/you/like.txt' ) ), array( // two slashes: UNC filesystem path Windows-style 'file://intranet/whatever/you/like.txt', - 'file://intranet./whatever/you/like.txt' + array( 'file://intranet./whatever/you/like.txt' ) ), // Multiple-slash cases that can sorta work on Mozilla // if you hack it just right are kinda pathological, @@ -875,6 +875,15 @@ // // Those will survive the algorithm but with results that // are less consistent. + + // protocol-relative URL cases per bug 29854... + array( + '//bugzilla.wikimedia.org/show_bug.cgi?id=28627', + array( + 'http://org.wikimedia.bugzilla./show_bug.cgi?id=28627', + 'https://org.wikimedia.bugzilla./show_bug.cgi?id=28627' + ) + ), ); } Index: includes/GlobalFunctions.php =================================================================== --- includes/GlobalFunctions.php (revision 101480) +++ includes/GlobalFunctions.php (working copy) @@ -605,12 +605,12 @@ } /** - * Make a URL index, appropriate for the el_index field of externallinks. + * Make URL indexes, appropriate for the el_index field of externallinks. * * @param $url String - * @return String + * @return array */ -function wfMakeUrlIndex( $url ) { +function wfMakeUrlIndexes( $url ) { $bits = wfParseUrl( $url ); // Reverse the labels in the hostname, convert to lower case @@ -650,7 +650,12 @@ if ( isset( $bits['fragment'] ) ) { $index .= '#' . $bits['fragment']; } - return $index; + + if ( $prot == '' ) { + return array( "http:$index", "https:$index" ); + } else { + return array( $index ); + } } /** Index: includes/installer/MysqlUpdater.php =================================================================== --- includes/installer/MysqlUpdater.php (revision 101480) +++ includes/installer/MysqlUpdater.php (working copy) @@ -189,7 +189,8 @@ array( 'doMigrateUserOptions' ), array( 'dropField', 'user', 'user_options', 'patch-drop-user_options.sql' ), array( 'addField', 'revision', 'rev_sha1', 'patch-rev_sha1.sql' ), - array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' ) + array( 'addField', 'archive', 'ar_sha1', 'patch-ar_sha1.sql' ), + array( 'doExtLinksProtocolRelativeUpdate' ), ); } @@ -850,4 +851,56 @@ $this->applyPatch( 'patch-user-newtalk-timestamp-null.sql' ); $this->output( "done.\n" ); } + + protected function doExtLinksProtocolRelativeUpdate() { + $this->output( "Updating externallinks table to fix indexes for protocol-relative URLs...\n" ); + if ( wfGetLB()->getServerCount() > 1 ) { + // Slow, replication-friendly update + $res = $this->db->select( 'externallinks', array( 'el_from', 'el_to', 'el_index' ), + array( 'el_index' . $this->db->buildLike( '//', $this->db->anyString() ) ), __METHOD__ ); + $count = 0; + foreach ( $res as $row ) { + $count = ( $count + 1 ) % 100; + if ( $count == 0 ) { + wfWaitForSlaves(); + } + $this->db->insert( 'externallinks', + array( + 'el_from' => $row->el_from, + 'el_to' => $row->el_to, + 'el_index' => "http:$row->el_index", + ), __METHOD__ + ); + $this->db->update( 'externallinks', + array( + 'el_index' => "https:$row->el_index", + ), + array( + 'el_from' => $row->el_from, + 'el_to' => $row->el_to, + 'el_index' => $row->el_index, + ), __METHOD__ + ); + } + } else { + // Fast update + $this->db->insertSelect( 'externallinks', 'externallinks', + array( + 'el_from' => 'el_from', + 'el_to' => 'el_to', + 'el_index' => 'CONCAT(\'http:\', el_index)' + ), array( + 'el_index ' . $this->db->buildLike( '//', $this->db->anyString() ) + ), __METHOD__ + ); + $this->db->update( 'externallinks', + array( + 'el_index = CONCAT(\'https:\', el_index)' + ), array( + 'el_index ' . $this->db->buildLike( '//', $this->db->anyString() ) + ), __METHOD__ + ); + } + $this->output( "Done.\n" ); + } } Index: includes/LinksUpdate.php =================================================================== --- includes/LinksUpdate.php (revision 101480) +++ includes/LinksUpdate.php (working copy) @@ -456,11 +456,13 @@ $arr = array(); $diffs = array_diff_key( $this->mExternals, $existing ); foreach( $diffs as $url => $dummy ) { - $arr[] = array( - 'el_from' => $this->mId, - 'el_to' => $url, - 'el_index' => wfMakeUrlIndex( $url ), - ); + foreach( wfMakeUrlIndexes( $url ) as $index ) { + $arr[] = array( + 'el_from' => $this->mId, + 'el_to' => $url, + 'el_index' => $index, + ); + } } return $arr; } Index: includes/api/ApiQueryExternalLinks.php =================================================================== --- includes/api/ApiQueryExternalLinks.php (revision 101480) +++ includes/api/ApiQueryExternalLinks.php (working copy) @@ -69,6 +69,11 @@ $this->addOption( 'ORDER BY', 'el_from' ); } + // If we're querying all protocols, use DISTINCT to avoid repeating protocol-relative links twice + if ( $protocol === null ) { + $this->addOption( 'DISTINCT' ); + } + $this->addOption( 'LIMIT', $params['limit'] + 1 ); $offset = isset( $params['offset'] ) ? $params['offset'] : 0; if ( $offset ) {