From e4116795fc7082ccafc8a1979e22d323dac251f7 Mon Sep 17 00:00:00 2001
From: Matthew Flaschen <mflaschen@wikimedia.org>
Date: Tue, 28 Jul 2015 20:16:48 -0400
Subject: [PATCH] SECURITY: Add script to evict Flow from TextExtracts

We'll use this now that the quick fix to remove edit tokens is
in, and again when the next one is in (to remove all Flow script
from mw-content-text).

Bug: T107170
Change-Id: Id7c208297fc1eace2893897bf4baae180b686142
---
 maintenance/FlowEvictTextExtracts.php | 77 +++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)
 create mode 100644 maintenance/FlowEvictTextExtracts.php

diff --git a/maintenance/FlowEvictTextExtracts.php b/maintenance/FlowEvictTextExtracts.php
new file mode 100644
index 0000000..4c6f36b
--- /dev/null
+++ b/maintenance/FlowEvictTextExtracts.php
@@ -0,0 +1,77 @@
+<?php
+
+use Flow\Container;
+
+require_once ( getenv( 'MW_INSTALL_PATH' ) !== false
+	? getenv( 'MW_INSTALL_PATH' ) . '/maintenance/Maintenance.php'
+	: dirname( __FILE__ ) . '/../../../maintenance/Maintenance.php' );
+require_once( __DIR__ . '/../../Echo/includes/BatchRowUpdate.php' );
+
+/**
+ * @ingroup Maintenance
+ */
+class FlowEvictTextExtracts extends Maintenance {
+	public function __construct() {
+		parent::__construct();
+
+		$this->mDescription = 'Evicts Flow boards and topics from TextExtracts, because they may contain private data';
+		$this->setBatchSize( 100 );
+	}
+
+	public function execute() {
+		global $wgMemc;
+
+		$dbFactory = Container::get( 'db.factory' );
+
+		// Unused objects to fulfill signature requirements
+		$apiMain = new ApiMain();
+		$apiQuery = new ApiQuery( $apiMain, '' );
+		$emptyConfig = GlobalVarConfig::newInstance();
+		$extractsApi = new TextExtracts\ApiQueryExtracts( $apiQuery, '', $emptyConfig );
+
+		$cacheKeyMethod = new ReflectionMethod( 'TextExtracts\\ApiQueryExtracts::cacheKey' );
+		$cacheKeyMethod->setAccessible( true );
+
+		$paramsProperty = new ReflectionProperty( 'TextExtracts\ApiQueryExtracts', 'params' );
+		$paramsProperty->setAccessible( true );
+
+		// This could be reused for a stand-alone tool to list all Flow boards
+		$iterator = new EchoBatchRowIterator( $dbFactory->getWikiDB( DB_SLAVE ), 'page', 'page_id', $this->mBatchSize );
+		$iterator->setFetchColumns( array( 'page_namespace', 'page_title' ) );
+		$iterator->addConditions( array( 'page_content_model' => 'flow-board' ) );
+
+		$cacheKeyPermutations = array(
+			array( false, false ),
+			array( false, true ),
+			array( true, false ),
+			array( true, true )
+		);
+
+		foreach ( $iterator as $batch ) {
+			foreach ( $batch as $row ) {
+				$title = Title::newFromRow( $row );
+				$wikiPage = new WikiPage( $title );
+				foreach ( $cacheKeyPermutations as $permutation ) {
+					$cacheKey = self::getCacheKey( $cacheKeyMethod, $paramsProperty, $extractsApi, $wikiPage, $permutation[0], $permutation[1] );
+					$cachedValue = $wgMemc->get( $cacheKey );
+					if ( $cachedValue !== false ) {
+						echo "Found value for {$title->getPrefixedDBkey()}\n";
+						$wgMemc->delete( $cacheKey );
+						echo "Deleted $cacheKey\n";
+					}
+				}
+			}
+		}
+	}
+
+	protected static function getCacheKey( $cacheKeyMethod, $paramsProperty, $extractsApi, $wikiPage, $introOnly, $plaintext ) {
+		$params = $paramsProperty->getValue( $extractsApi );
+		$params['plaintext'] = $plaintext;
+		$paramsProperty->setValue( $extractsApi, $params );
+		$cacheKey = $cacheKeyMethod->invoke( $extractsApi, $wikiPage, $introOnly );
+		return $cacheKey;
+	}
+}
+
+$maintClass = 'FlowEvictTextExtracts';
+require_once( RUN_MAINTENANCE_IF_MAIN );
-- 
2.1.4

