Index: trunk/phase3/includes/Article.php =================================================================== --- trunk/phase3/includes/Article.php (revision 53416) +++ trunk/phase3/includes/Article.php (working copy) @@ -41,6 +41,7 @@ var $mUser = -1; //!< Not loaded var $mUserText = ''; //!< var $mParserOptions; //!< + var $mParserOutput; //!< /**@}}*/ /** @@ -772,7 +773,6 @@ } $wgOut->setArticleFlag( true ); - $wgOut->setRobotPolicy( $this->getRobotPolicyForView() ); # Set page title (may be overridden by DISPLAYTITLE) $wgOut->setPageTitle( $this->mTitle->getPrefixedText() ); @@ -804,108 +804,121 @@ $this->showNamespaceHeader(); $outputDone = false; - wfRunHooks( 'ArticleViewHeader', array( &$this, &$outputDone, &$useParserCache ) ); - # Try the parser cache - if( !$outputDone && $useParserCache ) { - $parserOutput = $parserCache->get( $this, $parserOptions ); - if ( $parserOutput !== false ) { - wfDebug( __METHOD__.": showing parser cache contents\n" ); - $wgOut->addParserOutput( $parserOutput ); - // Ensure that UI elements requiring revision ID have - // the correct version information. - $wgOut->setRevisionId( $this->mLatest ); - $outputDone = true; - } - } + # Iterate through the possible ways of constructing the output text + $pass = 0; + while( !$outputDone && ++$pass ){ + switch( $pass ){ - if ( $outputDone ) { - $this->showViewFooter(); - $this->viewUpdates(); - wfProfileOut( __METHOD__ ); - return; - } + case 1: + wfRunHooks( 'ArticleViewHeader', array( &$this, &$outputDone, &$useParserCache ) ); + break; - $text = $this->getContent(); - if( $text === false || $this->getID() == 0 ) { - wfDebug( __METHOD__.": showing missing article\n" ); - $this->showMissingArticle(); - wfProfileOut( __METHOD__ ); - return; - } + case 2: + # Try the parser cache + if( $useParserCache ) { + $this->mParserOutput = $parserCache->get( $this, $parserOptions ); + if ( $this->mParserOutput !== false ) { + wfDebug( __METHOD__.": showing parser cache contents\n" ); + $wgOut->addParserOutput( $this->mParserOutput ); + // Ensure that UI elements requiring revision ID have + // the correct version information. + $wgOut->setRevisionId( $this->mLatest ); + $outputDone = true; + } + } + break; - # Another whitelist check in case oldid is altering the title - if( !$this->mTitle->userCanRead() ) { - wfDebug( __METHOD__.": denied on secondary read check\n" ); - $wgOut->loginToUse(); - $wgOut->output(); - $wgOut->disable(); - wfProfileOut( __METHOD__ ); - return; - } + case 3: + $text = $this->getContent(); + if( $text === false || $this->getID() == 0 ) { + wfDebug( __METHOD__.": showing missing article\n" ); + $this->showMissingArticle(); + wfProfileOut( __METHOD__ ); + return; + } - # We're looking at an old revision - if( $oldid && !is_null( $this->mRevision ) ) { - $this->setOldSubtitle( $oldid ); - if ( !$this->showDeletedRevisionHeader() ) { - wfDebug( __METHOD__.": cannot view deleted revision\n" ); - wfProfileOut( __METHOD__ ); - return; - } + # Another whitelist check in case oldid is altering the title + if( !$this->mTitle->userCanRead() ) { + wfDebug( __METHOD__.": denied on secondary read check\n" ); + $wgOut->loginToUse(); + $wgOut->output(); + $wgOut->disable(); + wfProfileOut( __METHOD__ ); + return; + } - if ( $oldid === $this->getLatest() && $this->useParserCache( false ) ) { - $parserOutput = $parserCache->get( $this, $parserOptions ); - if ( $parserOutput ) { - wfDebug( __METHOD__.": showing parser cache for current rev permalink\n" ); - $wgOut->addParserOutput( $parserOutput ); - $this->showViewFooter(); - $this->viewUpdates(); - wfProfileOut( __METHOD__ ); - return; - } - } - } + # We're looking at an old revision + if( $oldid && !is_null( $this->mRevision ) ) { + $this->setOldSubtitle( $oldid ); + if ( !$this->showDeletedRevisionHeader() ) { + wfDebug( __METHOD__.": cannot view deleted revision\n" ); + wfProfileOut( __METHOD__ ); + return; + } - // Ensure that UI elements requiring revision ID have - // the correct version information. - $wgOut->setRevisionId( $this->getRevIdFetched() ); + if ( $oldid === $this->getLatest() && $this->useParserCache( false ) ) { + $this->mParserOutput = $parserCache->get( $this, $parserOptions ); + if ( $this->mParserOutput ) { + wfDebug( __METHOD__.": showing parser cache for current rev permalink\n" ); + $wgOut->addParserOutput( $this->mParserOutput ); + $this->showViewFooter(); + $this->viewUpdates(); + wfProfileOut( __METHOD__ ); + return; + } + } + } - // Pages containing custom CSS or JavaScript get special treatment - if( $this->mTitle->isCssOrJsPage() || $this->mTitle->isCssJsSubpage() ) { - wfDebug( __METHOD__.": showing CSS/JS source\n" ); - $this->showCssOrJsPage(); - $outputDone = true; - } else if( $rt = Title::newFromRedirectArray( $text ) ) { - wfDebug( __METHOD__.": showing redirect=no page\n" ); - # Viewing a redirect page (e.g. with parameter redirect=no) - # Don't append the subtitle if this was an old revision - $wgOut->addHTML( $this->viewRedirect( $rt, !$wasRedirected && $this->isCurrent() ) ); - # Parse just to get categories, displaytitle, etc. - $parserOutput = $wgParser->parse( $text, $this->mTitle, $parserOptions ); - $wgOut->addParserOutputNoText( $parserOutput ); - $outputDone = true; - } - if ( $outputDone ) { - $this->showViewFooter(); - $this->viewUpdates(); - wfProfileOut( __METHOD__ ); - return; - } + // Ensure that UI elements requiring revision ID have + // the correct version information. + $wgOut->setRevisionId( $this->getRevIdFetched() ); - # Run the parse, protected by a pool counter - wfDebug( __METHOD__.": doing uncached parse\n" ); - $key = $parserCache->getKey( $this, $parserOptions ); - $poolCounter = PoolCounter::factory( 'Article::view', $key ); - $dirtyCallback = $useParserCache ? array( $this, 'tryDirtyCache' ) : false; - $status = $poolCounter->executeProtected( array( $this, 'doViewParse' ), $dirtyCallback ); + // Pages containing custom CSS or JavaScript get special treatment + if( $this->mTitle->isCssOrJsPage() || $this->mTitle->isCssJsSubpage() ) { + wfDebug( __METHOD__.": showing CSS/JS source\n" ); + $this->showCssOrJsPage(); + $outputDone = true; + } else if( $rt = Title::newFromRedirectArray( $text ) ) { + wfDebug( __METHOD__.": showing redirect=no page\n" ); + # Viewing a redirect page (e.g. with parameter redirect=no) + # Don't append the subtitle if this was an old revision + $wgOut->addHTML( $this->viewRedirect( $rt, !$wasRedirected && $this->isCurrent() ) ); + # Parse just to get categories, displaytitle, etc. + $this->mParserOutput = $wgParser->parse( $text, $this->mTitle, $parserOptions ); + $wgOut->addParserOutputNoText( $this->mParserOutput ); + $outputDone = true; + } + break; - if ( !$status->isOK() ) { - # Connection or timeout error - $this->showPoolError( $status ); - wfProfileOut( __METHOD__ ); - return; + case 4: + # Run the parse, protected by a pool counter + wfDebug( __METHOD__.": doing uncached parse\n" ); + $key = $parserCache->getKey( $this, $parserOptions ); + $poolCounter = PoolCounter::factory( 'Article::view', $key ); + $dirtyCallback = $useParserCache ? array( $this, 'tryDirtyCache' ) : false; + $status = $poolCounter->executeProtected( array( $this, 'doViewParse' ), $dirtyCallback ); + + if ( !$status->isOK() ) { + # Connection or timeout error + $this->showPoolError( $status ); + wfProfileOut( __METHOD__ ); + return; + } else { + $outputDone = true; + } + break; + + # Should be unreachable, but just in case... + default: + break 2; + } } + # Now that we've filled $this->mParserOutput, we know whether + # there are any __NOINDEX__ tags on the page + $this->setRobotPolicyForView(); + $this->showViewFooter(); $this->viewUpdates(); wfProfileOut( __METHOD__ ); @@ -961,9 +974,10 @@ } /** - * Get the robot policy to be used for the current action=view request. + * Set the robot policy to be used for the current action=view request. + * @return String the policy that was set */ - public function getRobotPolicyForView() { + public function setRobotPolicyForView() { global $wgOut, $wgArticleRobotPolicies, $wgNamespaceRobotPolicies; global $wgDefaultRobotPolicy, $wgRequest; @@ -973,32 +987,92 @@ if( !$this->mTitle->isSubpage() ) { $block = new Block(); if( $block->load( $this->mTitle->getText() ) ) { - return 'noindex,nofollow'; + $wgOut->setIndexPolicy( 'noindex' ); + $wgOut->setFollowPolicy( 'nofollow' ); + return array( 'index' => 'noindex', + 'follow' => 'nofollow' ); } } } if( $this->getID() === 0 || $this->getOldID() ) { - return 'noindex,nofollow'; + # Non-articles (special pages etc), and old revisions + $wgOut->setIndexPolicy( 'noindex' ); + $wgOut->setFollowPolicy( 'nofollow' ); + return array( 'index' => 'noindex', + 'follow' => 'nofollow' ); } elseif( $wgOut->isPrintable() ) { # Discourage indexing of printable versions, but encourage following - return 'noindex,follow'; + $wgOut->setIndexPolicy( 'noindex' ); + $wgOut->setFollowPolicy( 'follow' ); + return array( 'index' => 'noindex', + 'follow' => 'follow' ); } elseif( $wgRequest->getInt('curid') ) { # For ?curid=x urls, disallow indexing - return 'noindex,follow'; - } elseif( isset( $wgArticleRobotPolicies[$this->mTitle->getPrefixedText()] ) ) { - return $wgArticleRobotPolicies[$this->mTitle->getPrefixedText()]; - } elseif( isset( $wgNamespaceRobotPolicies[$ns] ) ) { + $wgOut->setIndexPolicy( 'noindex' ); + $wgOut->setFollowPolicy( 'follow' ); + return array( 'index' => 'noindex', + 'follow' => 'follow' ); + } + + # Otherwise, construct the policy based on the various config variables. + $policy = self::formatRobotPolicy( $wgDefaultRobotPolicy ); + + if( isset( $wgNamespaceRobotPolicies[$ns] ) ){ # Honour customised robot policies for this namespace - return $wgNamespaceRobotPolicies[$ns]; - } else { - return $wgDefaultRobotPolicy; + $policy = array_merge( $policy, + self::formatRobotPolicy( $wgNamespaceRobotPolicies[$ns] ) ); } + + if( $this->mTitle->canUseNoindex() && $this->mParserOutput->getIndexPolicy() ){ + # __INDEX__ and __NOINDEX__ magic words, if allowed. + $policy = array_merge( $policy, + array( 'index' => $this->mParserOutput->getIndexPolicy() ) ); + } + + if( isset( $wgArticleRobotPolicies[$this->mTitle->getPrefixedText()] ) ){ + # (bug 14900) site config can override user-defined __INDEX__ or __NOINDEX__ + $policy = array_merge( $policy, + self::formatRobotPolicy( $wgArticleRobotPolicies[$this->mTitle->getPrefixedText()] ) ); + } + + $wgOut->setIndexPolicy( $policy['index'] ); + $wgOut->setFollowPolicy( $policy['follow'] ); + return $policy; + } /** - * If this request is a redirect view, send "redirected from" subtitle to - * $wgOut. Returns true if the header was needed, false if this is not a + * Converts a String robot policy into an associative array, to allow + * merging of several policies using array_merge(). + * @param $policy Mixed, returns empty array on null/false/'', transparent + * to already-converted arrays, converts String. + * @return associative Array: 'index' => , 'follow' => + */ + public static function formatRobotPolicy( $policy ){ + if( is_array( $policy ) ){ + return $policy; + } elseif( !$policy ){ + return array(); + } + + $policy = explode( ',', $policy ); + $policy = array_map( 'trim', $policy ); + + $arr = array(); + foreach( $policy as $var ){ + if( in_array( $var, array('index','noindex') ) ){ + $arr['index'] = $var; + } elseif( in_array( $var, array('follow','nofollow') ) ){ + $arr['follow'] = $var; + } + } + return $arr; + } + + /** + * If this request is a redirect view, send "redirected from" subtitle to + * $wgOut. Returns true if the header was needed, false if this is not a * redirect view. Handles both local and remote redirects. */ public function showRedirectedFromHeader() { @@ -3835,11 +3909,11 @@ */ public function outputWikiText( $text, $cache = true, $parserOptions = false ) { global $wgOut; - - $parserOutput = $this->getOutputFromWikitext( $text, $cache, $parserOptions ); - $wgOut->addParserOutput( $parserOutput ); + + $this->mParserOutput = $this->getOutputFromWikitext( $text, $cache, $parserOptions ); + $wgOut->addParserOutput( $this->mParserOutput ); } - + /** * This does all the heavy lifting for outputWikitext, except it returns the parser * output instead of sending it straight to $wgOut. Makes things nice and simple for, @@ -3853,7 +3927,7 @@ } $time = -wfTime(); - $parserOutput = $wgParser->parse( $text, $this->mTitle, + $this->mParserOutput = $wgParser->parse( $text, $this->mTitle, $parserOptions, true, true, $this->getRevIdFetched() ); $time += wfTime(); @@ -3863,18 +3937,18 @@ $this->mTitle->getPrefixedDBkey())); } - if( $wgEnableParserCache && $cache && $this && $parserOutput->getCacheTime() != -1 ) { + if( $wgEnableParserCache && $cache && $this && $this->mParserOutput->getCacheTime() != -1 ) { $parserCache = ParserCache::singleton(); - $parserCache->save( $parserOutput, $this, $parserOptions ); + $parserCache->save( $this->mParserOutput, $this, $parserOptions ); } // Make sure file cache is not used on uncacheable content. // Output that has magic words in it can still use the parser cache // (if enabled), though it will generally expire sooner. - if( $parserOutput->getCacheTime() == -1 || $parserOutput->containsOldMagic() ) { + if( $this->mParserOutput->getCacheTime() == -1 || $this->mParserOutput->containsOldMagic() ) { $wgUseFileCache = false; } - $this->doCascadeProtectionUpdates( $parserOutput ); - return $parserOutput; + $this->doCascadeProtectionUpdates( $this->mParserOutput ); + return $this->mParserOutput; } /** Index: trunk/phase3/includes/parser/Parser.php =================================================================== --- trunk/phase3/includes/parser/Parser.php (revision 53416) +++ trunk/phase3/includes/parser/Parser.php (working copy) @@ -3393,28 +3393,43 @@ } if ( isset( $this->mDoubleUnderscores['hiddencat'] ) && $this->mTitle->getNamespace() == NS_CATEGORY ) { $this->mOutput->setProperty( 'hiddencat', 'y' ); - - $containerCategory = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( 'hidden-category-category' ) ); - if ( $containerCategory ) { - $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); - } else { - wfDebug( __METHOD__.": [[MediaWiki:hidden-category-category]] is not a valid title!\n" ); - } + $this->addTrackingCategory( 'hidden-category-category' ); } # (bug 8068) Allow control over whether robots index a page. # # FIXME (bug 14899): __INDEX__ always overrides __NOINDEX__ here! This # is not desirable, the last one on the page should win. - if( isset( $this->mDoubleUnderscores['noindex'] ) ) { + if( isset( $this->mDoubleUnderscores['noindex'] ) && $this->mTitle->canUseNoindex() ){ $this->mOutput->setIndexPolicy( 'noindex' ); - } elseif( isset( $this->mDoubleUnderscores['index'] ) ) { + $this->addTrackingCategory( 'noindex-category' ); + } + if( isset( $this->mDoubleUnderscores['index'] ) && $this->mTitle->canUseNoindex() ){ $this->mOutput->setIndexPolicy( 'index' ); + $this->addTrackingCategory( 'index-category' ); } + wfProfileOut( __METHOD__ ); return $text; } /** + * Add a tracking category, getting the title from a system message, + * or print a debug message if the title is invalid. + * @param $msg String message key + * @return Bool whether the addition was successful + */ + protected function addTrackingCategory( $msg ){ + $containerCategory = Title::makeTitleSafe( NS_CATEGORY, wfMsgForContent( $msg ) ); + if ( $containerCategory ) { + $this->mOutput->addCategory( $containerCategory->getDBkey(), $this->getDefaultSort() ); + return true; + } else { + wfDebug( __METHOD__.": [[MediaWiki:$msg]] is not a valid title!\n" ); + return false; + } + } + + /** * This function accomplishes several tasks: * 1) Auto-number headings if that option is enabled * 2) Add an [edit] link to sections for users who have enabled the option and can edit the page Index: trunk/phase3/includes/OutputPage.php =================================================================== --- trunk/phase3/includes/OutputPage.php (revision 53416) +++ trunk/phase3/includes/OutputPage.php (working copy) @@ -451,21 +451,13 @@ * @return null */ public function setRobotPolicy( $policy ) { - $policy = explode( ',', $policy ); - $policy = array_map( 'trim', $policy ); + $policy = Article::formatRobotPolicy( $policy ); - # The default policy is follow, so if nothing is said explicitly, we - # do that. - if( in_array( 'nofollow', $policy ) ) { - $this->mFollowPolicy = 'nofollow'; - } else { - $this->mFollowPolicy = 'follow'; + if( isset( $policy['index'] ) ){ + $this->setIndexPolicy( $policy['index'] ); } - - if( in_array( 'noindex', $policy ) ) { - $this->mIndexPolicy = 'noindex'; - } else { - $this->mIndexPolicy = 'index'; + if( isset( $policy['follow'] ) ){ + $this->setFollowPolicy( $policy['follow'] ); } } @@ -738,17 +730,6 @@ $this->mNewSectionLink = $parserOutput->getNewSection(); $this->mHideNewSectionLink = $parserOutput->getHideNewSection(); - if( is_null( $wgExemptFromUserRobotsControl ) ) { - $bannedNamespaces = $wgContentNamespaces; - } else { - $bannedNamespaces = $wgExemptFromUserRobotsControl; - } - if( !in_array( $this->getTitle()->getNamespace(), $bannedNamespaces ) ) { - # FIXME (bug 14900): This overrides $wgArticleRobotPolicies, and it - # shouldn't - $this->setIndexPolicy( $parserOutput->getIndexPolicy() ); - } - $this->addKeywords( $parserOutput ); $this->mParseWarnings = $parserOutput->getWarnings(); if ( $parserOutput->getCacheTime() == -1 ) { Index: trunk/phase3/includes/Title.php =================================================================== --- trunk/phase3/includes/Title.php (revision 53416) +++ trunk/phase3/includes/Title.php (working copy) @@ -3646,4 +3646,21 @@ } return $this->mBacklinkCache; } + + /** + * Whether the magic words __INDEX__ and __NOINDEX__ function for + * this page. + * @return Bool + */ + public function canUseNoindex(){ + global $wgArticleRobotPolicies, $wgContentNamespaces, + $wgExemptFromUserRobotsControl; + + $bannedNamespaces = is_null( $wgExemptFromUserRobotsControl ) + ? $wgContentNamespaces + : $wgExemptFromUserRobotsControl; + + return !in_array( $this->mNamespace, $bannedNamespaces ); + + } } Index: trunk/phase3/includes/DefaultSettings.php =================================================================== --- trunk/phase3/includes/DefaultSettings.php (revision 53416) +++ trunk/phase3/includes/DefaultSettings.php (working copy) @@ -3875,8 +3875,11 @@ $wgExceptionHooks = array(); /** - * Page property link table invalidation lists. Should only be set by exten- - * sions. + * Page property link table invalidation lists. When a page property + * changes, this may require other link tables to be updated (eg + * adding __HIDDENCAT__ means the hiddencat tracking category will + * have been added, so the categorylinks table needs to be rebuilt). + * This array can be added to by extensions. */ $wgPagePropLinkInvalidations = array( 'hiddencat' => 'categorylinks',