From 0c978485b389f649164a046c115801dc236e516b Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Tue, 31 May 2016 23:36:42 -0400 Subject: [PATCH] SECURITY: XSS and cache pollution issues in DoubleWiki This fixes the following issues: * In alignment
 tags, if the search text had a " in it, you
   could break out of an attribute and add an event handler (XSS)
* If any HTML attributes can contain 

or tags, you can cause the table row to be broken in the middle of the html attribute, leading to XSS. (Most of modern MW does not allow this) * If the page contains a parser function or similar tag with private information on it, which calls $parser->disableCache() in order to keep the info per user, the way this extension does caching could allow a malicious user to find this private information. This is exasperated by the fact that the extension can perform DoubleWiki matches from edit pages, allowing a different origin site to load a page in the background which puts the secret info in the cache with no user interaction beyond browsing to the malicious site (In particular, this can leak rollback tokens on some versions of MediaWiki, and it can leak suppressed usernames via {{special:Listusers}}. I also modified some code to be more paranoid about escaping (for example, escaping language names, even though they should not be under user control). Bug: T131199 --- DoubleWiki_body.php | 180 ++++++++++++++++++++++++++++++++++++++++++++++++++-- i18n/en.json | 7 +- i18n/qqq.json | 5 +- 3 files changed, 182 insertions(+), 10 deletions(-) diff --git a/DoubleWiki_body.php b/DoubleWiki_body.php index 77ab2fc..1f98dac 100644 --- a/DoubleWiki_body.php +++ b/DoubleWiki_body.php @@ -15,16 +15,39 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # http://www.gnu.org/copyleft/gpl.html -class DoubleWiki { +use MediaWiki\Logger\LoggerFactory; +use Psr\Log\LoggerAwareInterface; +use Psr\Log\LoggerInterface; + +class DoubleWiki implements LoggerAwareInterface { + /** * Tags that must be closed. (list copied from Sanitizer.php) */ public $tags = '/<\/?(b|del|i|ins|u|font|big|small|sub|sup|h1|h2|h3|h4|h5|h6|cite|code|em|s|strike|strong|tt|tr|td|var|div|center|blockquote|ol|ul|dl|table|caption|pre|ruby|rt|rb|rp|p|span)([\s](.*?)>|>)/i'; /** + * Determine if the page has html type things which make it + * potentially unsafe to do these types of regex replacements on. + * Most of MW does not output html that will match this. + */ + const UNSAFE_ATTRIBS = '/<[^><]*logger = $logger; + } + + public function __construct() { + $this->setLogger( LoggerFactory::getInstance( 'doublewiki' ) ); + } + + /** * Read the list of matched phrases and add tags to the html output. */ function addMatchingTags ( &$text, $lang ) { + $lang = htmlspecialchars( $lang ); $pattern = "/
\n*
(.*?)<\/pre>\n*<\/div>/is";
 		$m = array();
 		if ( ! preg_match( $pattern, $text, $m ) ) {
@@ -35,7 +58,26 @@ class DoubleWiki {
 		$items = array();
 		preg_match_all( $line_pattern, $m[1], $items, PREG_SET_ORDER );
 		foreach ( $items as $n => $i ) {
-			$text = str_replace( $i[1], "" . $i[1], $text );
+			if ( preg_match( '/^[^<>]*(?:<[^<>]*>[^<>]*)*$/', $i[1] ) !== 1 ) {
+				// unclosed html, skip.
+				$this->logger->warning( "Potentially unsafe replacement text '{$text}' skipped",
+					[
+						"text" => $i[1],
+						"replacement-title" => $i[2],
+						"method" => __METHOD__,
+						"target-lang" => $lang
+					]
+				);
+				continue;
+			}
+			// If this replaces inside an html tag, the check in
+			// addMatchedText() for self::UNSAFE_ATTRIBS should catch it.
+			$id = "dw-" . htmlspecialchars( $n );
+			// It is particularly important that " is escaped, otherwise this
+			// won't match the regex later on, and won't be removed from the page
+			// source.
+			$title = Sanitizer::escapeHtmlAllowEntities( $i[2] );
+			$text = str_replace( $i[1], "" . $i[1], $text );
 		}
 	}
 
@@ -46,6 +88,62 @@ class DoubleWiki {
 	}
 
 	/**
+	 * Verify that html meets certain assumptions
+	 *
+	 * Includes that translation only contains a text body
+	 * and that both of them do not contain < or > in attributes.
+	 *
+	 * @param $text String HTML contents of text in this language
+	 * @param $translation String HTML contents of translation
+	 * @param $curTitle String Prefixed db key for current page
+	 * @param $translationTitle Prefixed db key including interwiki for translation
+	 * @throws FatalError If unsafe input detected.
+	 */
+	private function verifyTextIsSane( $text, $translation, $curTitle, $translationTitle ) {
+		if ( preg_match( '/<(?:html|!doctype|head)/i', $translation ) !== 0 ) {
+			// This can happen for example, if you have an interwiki to a special
+			// page. This is probably not actually unsafe, but would give garbled
+			// output, and is almost certainly not intentional. Abort as precaution.
+			$this->logger->info( "DoubleWiki aborted due to full html in {page}",
+				[
+					"method" => __METHOD__,
+					"page" => $translationTitle,
+					"frompage" => $curTitle
+				]
+			);
+			$msg = wfMessage( 'doublewiki-translationfullhtml' )
+				->params( $translationTitle )
+				->parse();
+			throw new FatalError( $msg );
+		}
+
+		$unsafePage = false;
+		if ( preg_match( self::UNSAFE_ATTRIBS, $text ) !== 0 ) {
+			$unsafePage = $curTitle;
+		} elseif ( preg_match( self::UNSAFE_ATTRIBS, $translation ) !== 0 ) {
+			$unsafePage = $translationTitle;
+		}
+
+		if ( $unsafePage !== false ) {
+			// It is assumed that this will basically
+			// never happen in a non-malicious situation
+			$this->logger->warning(
+				"Aborting DoubleWiki due to unsafe HTML on {unsafe} during match between {left} and {right}",
+				[
+					"unsafe" => $unsafePage,
+					"left" => $curTitle,
+					"right" => $translationTitle,
+					"method" => __METHOD__
+				]
+			);
+			$msg = wfMessage( 'doublewiki-unsafeattribute' )
+				->params( $unsafePage )
+				->parse();
+			throw new FatalError( $msg );
+		}
+	}
+
+	/**
 	 * Hook function called with &match=lang
 	 * Transform $text into a bilingual version
 	 * @param $out OutputPage
@@ -55,11 +153,27 @@ class DoubleWiki {
 	function addMatchedText( &$out, &$text ) {
 		global $wgContLang, $wgContLanguageCode, $wgMemc, $wgDoubleWikiCacheTime;
 
-		$match_request = $out->getRequest()->getText( 'match' );
-		if ( $match_request === '' ) {
+		$req = $out->getRequest();
+		$action = $req->getVal( 'action', 'view' );
+		if ( $action !== 'view' && $action !== 'render' ) {
+			// Sounds fishy. Bail
+			return true;
+		}
+
+		if ( $out->getTitle()->inNamespace( NS_SPECIAL ) ) {
+			// Sounds fishy, bail.
+			// Also probably wouldn't work anyhow
+			return true;
+		}
+
+		$match_request = $req->getText( 'match' );
+		if ( $match_request === ''
+			// This is paranoia, since checking it matches an interlanguage
+			// link should eliminate evil stuff, but paranoia never hurt.
+			|| htmlspecialchars( $match_request ) !== $match_request
+		) {
 			return true;
 		}
-		$this->addMatchingTags ( $text, $match_request );
 
 		$langLinks = $out->getLanguageLinks();
 		foreach( $langLinks as $l ) {
@@ -78,16 +192,64 @@ class DoubleWiki {
 					$languageName = Language::fetchLanguageName( $iw );
 					$myLanguage = Language::fetchLanguageName( $wgContLang->getCode() );
 					$translation = Http::get( wfAppendQuery( $url, array( 'action' => 'render' ) ) );
+					// This is hacky. Giving null as the argument means return the
+					// current value without setting (See docs wfSetVar()).
+					// enableClientCache() will be set to false if the page contains
+					// something that is very per-user, like {{Special:RecentChanges}}.
+					// In which case we don't want to use the current user's version. (Could contain tokens)
+					// Maybe we should do this in all cases? Maybe we should use
+					// ParserCache::singleton()->get() w/ canonical parser options instead?
+					$po = null;
+					if ( $out->canUseWikiPage() ) {
+						$po = $out->getWikiPage()->makeParserOptions( $out );
+						$canonicalPo = $out->getWikiPage()->makeParserOptions( 'canonical' );
+						// caching already varries by user lang.
+						$canonicalPo->setUserLang( $out->getLanguage() );
+					}
+					if ( $out->enableClientCache( null ) === false
+						|| !$po
+						|| !$po->matches( $canonicalPo )
+					) {
+						$curPageUrl = $out->getTitle()->getFullUrl(
+							[
+								'action' => 'render',
+								'uselang' => $out->getLanguage()->getCode()
+							],
+							false,
+							PROTO_CANONICAL
+						);
+						$res = Http::get( $curPageUrl );
+						if ( $res === null ) {
+							// Wiki can't reach itself??
+							$this->logger->error( "Could not fetch {curPageUrl}",
+								[ 'method' => __METHOD__, 'curPageUrl' => $curPageUrl ]
+							);
+							$msg = wfMessage( 'doublewiki-cannotfetchself' )->parse();
+							throw new FatalError( $msg );
+						}
+						$text = $res;
+					}
 
 					if ( $translation !== null ) {
+						$this->addMatchingTags( $text, $match_request );
+
+						// Important that this be done after
+						// addMatchingTags()
+						$this->verifyTextIsSane(
+							$text,
+							$translation,
+							$out->getTitle()->getPrefixedDbKey(),
+							$nt->getPrefixedDbKey()
+						);
 						/**
 						 * first find all links that have no 'class' parameter.
 						 * these links are local so we add '?match=xx' to their url,
 						 * unless it already contains a '?'
 						 */
+						$langCode = htmlspecialchars( $wgContLanguageCode );
 						$translation = preg_replace(
 							"/\s])([^\>\s]*))*\>/i",
-							"", $translation );
+							"", $translation );
 						// now add class='extiw' to these links
 						$translation = preg_replace(
 							"/\s])([^\>\s]*))*\>/i",
@@ -113,7 +275,6 @@ class DoubleWiki {
 						$match_request_lang = wfGetLangObj( $match_request );
 						$text = $this->matchColumns( $text, $myLanguage, $myURL, $wgContLang,
 									$translation, $languageName, $url, $match_request_lang );
-
 						$wgMemc->set( $key, $text, $wgDoubleWikiCacheTime );
 					}
 				}
@@ -164,6 +325,9 @@ class DoubleWiki {
 				$found = true;
 			} else {
 				// look for requested tag in the text
+				// This is safe, since we only select up to next
+				// 

or and the check in addMatchedText + // fatals if there is attributes with html tags inside. $a = strpos ( $right_text, $tag ); if ( $a ) { $found = true; @@ -212,6 +376,8 @@ class DoubleWiki { // format table head and return results $left_url = htmlspecialchars( $left_url ); $right_url = htmlspecialchars( $right_url ); + $left_title = htmlspecialchars( $left_title ); + $right_title = htmlspecialchars( $right_title ); $head = " diff --git a/i18n/en.json b/i18n/en.json index 0acf273..64d6d3a 100644 --- a/i18n/en.json +++ b/i18n/en.json @@ -2,5 +2,8 @@ "@metadata": { "authors": [] }, - "doublewiki-desc": "Displays a page and its translation from another wiki on two columns of the same page" -} \ No newline at end of file + "doublewiki-desc": "Displays a page and its translation from another wiki on two columns of the same page", + "doublewiki-unsafeattribute": "Cannot show DoubleWiki match for this page due to an HTML attribute containing an unescaped < on [[:$1]]", + "doublewiki-translationfullhtml": "Cannot show DoubleWiki, since the page [[:$1]] appears to be a full HTML document and not a wiki page", + "doublewiki-cannotfetchself": "Cannot show DoubleWiki since the current page cannot be fetched. Check that the wiki is network accessible to itself." +} diff --git a/i18n/qqq.json b/i18n/qqq.json index dcb8abf..b466d6b 100644 --- a/i18n/qqq.json +++ b/i18n/qqq.json @@ -4,5 +4,8 @@ "Raymond" ] }, - "doublewiki-desc": "Extension description displayed on [[Special:Version]]" + "doublewiki-desc": "Extension description displayed on [[Special:Version]]", + "doublewiki-unsafeattribute": "Shown if DoubleWiki is disabled due to suspicious html in either the current page or the page being compared against. $1 - Page name with the unsafe html, possibly including an interlanguage prefix.", + "doublewiki-translationfullhtml": "Shown if the page being compared does not appear to be a wikipage. For example if the page being compared against is a special page, this error will be shown. $1 Name of page being compared, including interwiki prefix (e.g. 'de:Special:BlankPage')", + "doublewiki-cannotfetchself": "{{optional}} Shown if the current page is inaccessible over HTTP. Since it is very unlikely for this error to happen, it is probably optional to translate it." } -- 2.0.1