addDescription( 'Check articles to see if they are indexed by Elasticsearch' ); // $name, $description, $required = false, $withArg = false, $shortName = false, $multiOccurrence = false $this->addOption( 'dry-run', 'Do not perform any corrections/edits with "-n or --dry-run"', false, false, 'n' ); $this->addOption('verbose', 'List the titles with "-v or --verbose"', false, false, 'v' ); $this->setBatchSize( 1000 ); // parent method; adds --batch-size option } public function execute() { global $wgUser; $start = ''; // what title starts the select statement $numArticles = 0; // how many articles are there? $numBad = 0; // how many bad articles did we find? // do not edit anything $this->mNope = ( $this->hasOption( 'dry-run' ) )? true : false; // print a list (and stats) $this->mVerbose = ( $this->hasOption( 'verbose' ) )? true: false; $config = MediaWikiServices::getInstance()->getConfigFactory()->makeConfig( 'CirrusSearch' ); $conn = new Connection( $config ); $searcher = new Searcher( $conn, 0, 0, $config, [], $wgUser ); $db = wfGetDB( DB_REPLICA ); do { // $table, $vars, $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] $res = $db->select( 'page', [ 'page_namespace', 'page_title' ], [ 'page_title > ' . $db->addQuotes( $start ) ] , __METHOD__, [ 'ORDER BY' => 'page_title', 'LIMIT' => $this->getBatchSize() ] ); foreach ( $res as $row ) { $numArticles++; $start = $row->page_title; $title = Title::makeTitleSafe( $row->page_namespace, $row->page_title ); if ($title === null) { $this->output( "unable to create title object from " . "{$row->page_namespace}: {$row->page_title}\n" ); continue; } $docId = $config->makeId( $title->getArticleID() ); $esSources = $searcher->get( [ $docId ], true ); // We erroneously relied on if ( !$esSources->isOK() ) // until it was discovered that // the bad articles were already marked 'OK'. if ( !count($esSources->value) ) { $numBad++; if ( $this->mVerbose ) { $this->output( $title->getText() . "\n" ); } if ( !$this->mNope ) { $page = new WikiPage( $title ); $page->doEditContent( $page->getContent(), 'This changes nothing', EDIT_UPDATE, false, $wgUser ); $this->output( $title->getText() . " fixed\n" ); } } } } while ( $res->numRows() ); $this->output( "Found $numBad hidden articles out of $numArticles.\n\n" ); } } $maintClass = PurgeUnindexedPages::class; require_once RUN_MAINTENANCE_IF_MAIN;